********************************************************************************
/*
NSF Grant: SciSIP #1548288
	https://www.nsf.gov/awardsearch/showAward?AWD_ID=1548288
	
Paper: Influences of Academic Institutional Factors on R&D Funding for Graduate Students
	https://doi.org/10.1093/scipol/scx017

Data Building: Refer to Data Appendix for Overview of Data Construction for Program-Level Dataset

Acronyms:
	NSF GRFP: National Science Foundation Graduate Research Fellowship Program
	IPEDS: INtegrated Postsecondary Education Data System
	NRC: National Research Council


Prep NSF GRFP Data
	STEP 1: NSF GRFP Awardees and Honorable Mentions: Clean up university string (Lines: 36 - 290)
	STEP 2: IPEDS: Clean up university string (Lines: 291 - 429)
	STEP 3: Merge IPEDS list to GRFP Proposed and Current Institution (Lines: 430 - 1074)

Prep NRC Data
	STEP 4: NRC: Clean up university string (Lines: 1075 - 1186)
	STEP 5: Merge IPEDS to NRC Institution (Lines: 1187 - 1509)

Merge NRF GRFP and NRC University-Program Level Data
	STEP 6: GRFP & NRC Program Crosswalk (respectively) (Lines: 1510 - 2160)
	STEP 7: NSF GRFP & NRC Merge (by institution and program) (Lines: 2161 - 2386)
	STEP 8: Prep NRC data for university-program merge (Lines: 2387 - 2872)
	STEP 9: MERGE NRC & NSF GRFP Data (Lines: 2873 - 2886)

Primary Author of Data Building: Lauren Lanahan (University of Oregon, www.laurenlanahan.com)
*/
********************************************************************************

***** STEP 1: NSF GRFP Awardees and Honorable Mentions: Clean up university string
{
*Pull full list of Awardees -- Offered Award & Honorable Mention from: https://www.fastlane.nsf.gov/grfp/AwardeeList.do?method=loadAwardeeList
	*Offered Award (complete data from 1952 - 2014)
	*Honorable Mention (complete data from 1994 - 2014)
clear all 
global dir "<set according to user>"
use "$dir/GRFP/Offered Award.dta"
append using "$dir/GRFP/Honorable Mention.dta"
drop if YearAward < 1994
drop if Name == "Name"
** 58218 unique observations (58245 is listed as those reporting information; total population 58675)
** this dataset include 99.22 percent of award recipients and honorable mentions
********************************************************************************
** Clean up variable names **
rename Name PI
lab var PI "Principal Investigator"
rename BaccalaureateInstitution baccalaureate
lab var baccalaureate "Baccalaureate Institution"
rename FieldofStudy department
lab var department "Field of Study"
rename ProposedGraduateInstitution grfp_uni
lab var grfp_uni "Proposed GRFP Graduate Institution"
rename CurrentInstitution current_uni
lab var current_uni "Current Institution"
rename YearAward grfp_year
lab var grfp_year "GRFP Year Award"
rename AwardType type
lab var type "Award Type: Offered Award or Honorable Mention"
gen offered_award = 1 if type == "Offered Award"
recode offered_award (.=0)
lab var offered_award "Type Binary (offered award == 1)"

gen unique_counter_grfp = _n
sort unique_counter_grfp
********************************************************************************
** Recode 2014 list where Current Institution = Proposed Graduate Institution **
replace grfp_uni = current_uni if grfp_year == 2014
********************************************************************************
** PROPOSED INSTITUTION CLEANING
** Clean up institution names: lower case, trim spaces on the end of string variable
** Create list of institutions (1298 unique 'clean' institutions in dataset)
gen grfp_uni_clean_full = trim(itrim(lower(grfp_uni)))
br grfp_uni_clean_full

** Clean up name of institution
gen grfp_uni_clean1 = grfp_uni_clean_full
split grfp_uni_clean1, p("/")
** USE grfp_uni_clean11; assume first part is primary
lab var grfp_uni_clean11 "string portion of grfp_uni_clean1 before '/'"
lab var grfp_uni_clean12 "string portion of grfp_uni_clean1 after '/'"
lab var grfp_uni_clean13 "string portion of grfp_uni_clean1 after 2nd '/'"
gen grfp_uni_to_clean = grfp_uni_clean11
foreach x in grfp_uni_to_clean {
set more off
replace `x' = subinstr(`x', " of ", "", .)
replace `x' = subinstr(`x', " at ", "", .)
replace `x' = subinstr(`x', "coll ", "college", .)
replace `x' = subinstr(`x', "col ", "college", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', " the ", "", .)
replace `x' = subinstr(`x', " and ", "", .)
replace `x' = subinstr(`x', "&", "", .)
replace `x' = subinstr(`x', "university", "u", .)
replace `x' = subinstr(`x', "univ", "u", .)
replace `x' = subinstr(`x', "uni", "u", .)
replace `x' = subinstr(`x', "institute", "i", .)
replace `x' = subinstr(`x', "institution", "i", .)
replace `x' = subinstr(`x', "insts", "i", .)
replace `x' = subinstr(`x', "inst", "i", .)
replace `x' = subinstr(`x', " ins", "i", .)
replace `x' = subinstr(`x', "technology", "tech", .)
replace `x' = subinstr(`x', "-", "", .)
replace `x' = subinstr(`x', ",", "", .)
replace `x' = subinstr(`x', "/", "", .)
replace `x' = subinstr(`x', "(", "", .)
replace `x' = subinstr(`x', ")", "", .)
replace `x' = subinstr(`x', ".", "", .)
replace `x' = subinstr(`x', "state", "st", .)
replace `x' = subinstr(`x', " s ", "st", .)
replace `x' = subinstr(`x', "school", "sch", .)
replace `x' = subinstr(`x', "schl", "sch", .)
replace `x' = subinstr(`x', "centr", "center", .)
replace `x' = subinstr(`x', "ctr", "center", .)
replace `x' = subinstr(`x', "calif", "california", .)
replace `x' = subinstr(`x', "rsrve", "reserve", .)
replace `x' = subinstr(`x', "medical", "med", .)
replace `x' = subinstr(`x', "medicine", "med", .)
replace `x' = subinstr(`x', "ga ", "georgia", .)
replace `x' = subinstr(`x', "hlth", "health", .)
replace `x' = subinstr(`x', "laboratory", "lab", .)
replace `x' = subinstr(`x', "labs", "lab", .)
replace `x' = subinstr(`x', "physicians", "phys", .)
replace `x' = subinstr(`x', "physician", "phys", .)
replace `x' = subinstr(`x', "agriculture", "ag", .)
replace `x' = subinstr(`x', "agricultural", "ag", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', "sciences", "science", .)
replace `x' = subinstr(`x', "int'l", "international", .)
replace `x' = subinstr(`x', "intl", "international", .)
replace `x' = subinstr(`x', " engineering ", "eng", .)
replace `x' = subinstr(`x', " cnty ", "county", .)
replace `x' = subinstr(`x', " okla ", "oklahoma", .)
replace `x' = subinstr(`x', " va ", "virginia", .)
replace `x' = subinstr(`x', " ill ", "illinois", .)
replace `x' = subinstr(`x', "@", "", .)
replace `x' = subinstr(`x', "canada", "", .)
replace `x' = subinstr(`x', "polytechnic", "poly", .)
replace `x' = subinstr(`x', "polytech", "poly", .)
replace `x' = subinstr(`x', "saint", "st", .)
* for GRFP only: this may not work if there is no space at the front of the string
replace `x' = subinstr(`x', " gy ", "", .)
replace `x' = subinstr(`x', "obi ", "", .)
replace `x' = subinstr(`x', " l ", "", .)
replace `x' = subinstr(`x', " e ", "", .)
replace `x' = subinstr(`x', " pa ", "", .)
replace `x' = subinstr(`x', " us ", "", .)
replace `x' = subinstr(`x', " y ", "", .)
* remove spaces
gen `x'_nospace = subinstr(`x', " ","", .)
sort `x'_nospace
egen grfp_group_clean = group(`x')
egen grfp_group_clns = group(`x'_nospace)
}
rename grfp_uni_to_clean grfp_uni_cleaned
rename grfp_uni_to_clean_nospace grfp_uni_clns
egen grfp_group_original = group(grfp_uni_clean_full)
lab var grfp_group_original "Unique uni group ID, based on full grfp name"
lab var grfp_group_clean "Unique uni group ID, based on clean grfp name"
lab var grfp_group_clns "Unique uni group ID, based on clean grfp name with no spaces"
br grfp_uni_clean_full grfp_uni_cl*
sum grfp_group*
** 1398 original groups, 1055 cleaned groups, 974 cleaned & nospace **
sort grfp_group_clns
by grfp_group_clns: gen grfp_clns_counter = _n
by grfp_group_clns: gen grfp_clns_tally = _N
lab var grfp_clns_tally "number of obs within university, derived from grfp clean no space data"
gen uni_clns_to_match = grfp_uni_clns
** uni_clns_to_match: MATCH WITH IPEDS **
********************************************************************************
	** CLEAN UP CURRENT & BACCALAUREATE INSTITUTION ** 
********************************************************************************
gen grfp_bac = baccalaureate
gen grfp_cur = current_uni
foreach y in bac cur {
gen grfp_`y'_clean_full = trim(itrim(lower(grfp_`y')))
br grfp_`y'_clean_full
** Clean up name of institution
gen grfp_`y'_clean1 = grfp_`y'_clean_full
split grfp_`y'_clean1, p("/")
** USE grfp_`y'_clean11; assume first part is primary
lab var grfp_`y'_clean11 "string portion of grfp_`y'_clean1 before '/'"
lab var grfp_`y'_clean12 "string portion of grfp_`y'_clean1 after '/'"
lab var grfp_bac_clean13 "string portion of grfp_`y'_clean1 after 2nd '/'"  
gen grfp_`y'_to_clean = grfp_`y'_clean11
foreach x in grfp_`y'_to_clean {
set more off
replace `x' = subinstr(`x', " of ", "", .)
replace `x' = subinstr(`x', " at ", "", .)
replace `x' = subinstr(`x', "coll ", "college", .)
replace `x' = subinstr(`x', "col ", "college", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', " the ", "", .)
replace `x' = subinstr(`x', " and ", "", .)
replace `x' = subinstr(`x', "&", "", .)
replace `x' = subinstr(`x', "university", "u", .)
replace `x' = subinstr(`x', "univ", "u", .)
replace `x' = subinstr(`x', "uni", "u", .)
replace `x' = subinstr(`x', "institute", "i", .)
replace `x' = subinstr(`x', "institution", "i", .)
replace `x' = subinstr(`x', "insts", "i", .)
replace `x' = subinstr(`x', "inst", "i", .)
replace `x' = subinstr(`x', " ins", "i", .)
replace `x' = subinstr(`x', "technology", "tech", .)
replace `x' = subinstr(`x', "-", "", .)
replace `x' = subinstr(`x', ",", "", .)
replace `x' = subinstr(`x', "/", "", .)
replace `x' = subinstr(`x', "(", "", .)
replace `x' = subinstr(`x', ")", "", .)
replace `x' = subinstr(`x', ".", "", .)
replace `x' = subinstr(`x', "state", "st", .)
replace `x' = subinstr(`x', " s ", "st", .)
replace `x' = subinstr(`x', "school", "sch", .)
replace `x' = subinstr(`x', "schl", "sch", .)
replace `x' = subinstr(`x', "centr", "center", .)
replace `x' = subinstr(`x', "ctr", "center", .)
replace `x' = subinstr(`x', "calif", "california", .)
replace `x' = subinstr(`x', "rsrve", "reserve", .)
replace `x' = subinstr(`x', "medical", "med", .)
replace `x' = subinstr(`x', "medicine", "med", .)
replace `x' = subinstr(`x', "ga ", "georgia", .)
replace `x' = subinstr(`x', "hlth", "health", .)
replace `x' = subinstr(`x', "laboratory", "lab", .)
replace `x' = subinstr(`x', "labs", "lab", .)
replace `x' = subinstr(`x', "physicians", "phys", .)
replace `x' = subinstr(`x', "physician", "phys", .)
replace `x' = subinstr(`x', "agriculture", "ag", .)
replace `x' = subinstr(`x', "agricultural", "ag", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', "sciences", "science", .)
replace `x' = subinstr(`x', "int'l", "international", .)
replace `x' = subinstr(`x', "intl", "international", .)
replace `x' = subinstr(`x', " engineering ", "eng", .)
replace `x' = subinstr(`x', " cnty ", "county", .)
replace `x' = subinstr(`x', " okla ", "oklahoma", .)
replace `x' = subinstr(`x', " va ", "virginia", .)
replace `x' = subinstr(`x', " ill ", "illinois", .)
replace `x' = subinstr(`x', "@", "", .)
replace `x' = subinstr(`x', "canada", "", .)
replace `x' = subinstr(`x', "polytechnic", "poly", .)
replace `x' = subinstr(`x', "polytech", "poly", .)
replace `x' = subinstr(`x', "saint", "st", .)
* for GRFP only: this may not work if there is no space at the front of the string
replace `x' = subinstr(`x', " gy ", "", .)
replace `x' = subinstr(`x', "obi ", "", .)
replace `x' = subinstr(`x', " l ", "", .)
replace `x' = subinstr(`x', " e ", "", .)
replace `x' = subinstr(`x', " pa ", "", .)
replace `x' = subinstr(`x', " us ", "", .)
replace `x' = subinstr(`x', " y ", "", .)
* remove spaces
gen `x'_nospace = subinstr(`x', " ","", .)
sort `x'_nospace
egen grfp_group_`y'_clean = group(`x')
egen grfp_group_`y'_clns = group(`x'_nospace)
}
rename grfp_`y'_to_clean grfp_`y'_cleaned
rename grfp_`y'_to_clean_nospace grfp_`y'_clns
egen grfp_group_`y'_original = group(grfp_`y'_clean_full)
lab var grfp_group_`y'_original "Unique `y' group ID, based on full grfp name"
lab var grfp_group_`y'_clean "Unique `y' group ID, based on clean grfp name"
lab var grfp_group_`y'_clns "Unique `y' group ID, based on clean grfp name with no spaces"
br grfp_`y'_clean_full grfp_`y'_cl*
sum grfp_group_`y'*
*Proposed Institution* 1398 original groups, 1055 cleaned groups, 974 cleaned & nospace **
*BAC* 2682 original groups, 1893 cleaned groups, 1789 cleaned & nospace **
*CUR* 972 original groups, 943 cleaned groups, 895 cleaned & nospace **
sort grfp_group_`y'_clns
by grfp_group_`y'_clns: gen grfp_`y'_clns_counter = _n
by grfp_group_`y'_clns: gen grfp_`y'_clns_tally = _N
lab var grfp_`y'_clns_tally "number of obs within university, derived from grfp `y' clean no space data"
gen `y'_clns_to_match = grfp_`y'_clns
** bac_clns_to_match & cur_clns_to_match: MATCH WITH IPEDS **
}

** Check data
foreach x in cur bac {
sum grfp_`x'_clns_counter grfp_`x'_clns_tally
sum grfp_group_`x'_*
}
sum grfp_group_original grfp_group_clean grfp_group_clns
********************************************************************************
save "$dir/GRFP/GRFP list clean round2.dta",replace
********************************************************************************
}
***** STEP 2: IPEDS: Clean up university string
{
clear all 
global dir "<set according to user>"
use "$dir/IPEDS/IPEDS_FullUniverse_InstitutionUnitIDs_1987-2010.dta"
gen unique_counter_ipeds = _n
sort unique_counter_ipeds

rename unitid ipeds_id
lab var ipeds_id "IPEDS Institution Unique ID"
drop unique_list
lab var groupid "Group ID IPEDS"
rename academicyear ipeds_year 
lab var ipeds_year "Year added to IPEDS database"
rename instname ipeds_uni
lab var ipeds_uni "Institution name (IPEDS)"
rename city city_ipeds
lab var city_ipeds "City (IPEDS)"
rename state state_ipeds
lab var state_ipeds "State (IPEDS)"

* assess dups
sort ipeds_id
quietly by ipeds_id: gen dup = cond(_N==1,0,_n)
sum dup
* Original file had 19236 observations; 2129 rows have duplicate ipeds_id
* I glanced at the list of dups and they are mostly professional schools with different institutional names (spellings are slightly off)
drop if dup > 1
* cleaned file has 18128 observations; there are no duplicate iped numbers. There will be duplicate uni_clns_to_match.
drop dup 

gen ipeds_uni_clean_full = trim(itrim(lower(ipeds_uni)))
br ipeds_uni_clean_full

** Clean up name of institution
gen ipeds_uni_clean1 = ipeds_uni_clean_full
split ipeds_uni_clean1, p("/")
** USE ipeds_uni_clean11; assume first part is primary
lab var ipeds_uni_clean11 "string portion of ipeds_uni_clean1 before '/'"
lab var ipeds_uni_clean12 "string portion of ipeds_uni_clean1 after '/'"
lab var ipeds_uni_clean13 "string portion of ipeds_uni_clean1 after 2nd '/'"
gen ipeds_uni_to_clean = ipeds_uni_clean11
foreach x in ipeds_uni_to_clean {
set more off
replace `x' = subinstr(`x', " of ", "", .)
replace `x' = subinstr(`x', " at ", "", .)
replace `x' = subinstr(`x', "coll ", "college", .)
replace `x' = subinstr(`x', "col ", "college", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', " the ", "", .)
replace `x' = subinstr(`x', " and ", "", .)
replace `x' = subinstr(`x', "&", "", .)
replace `x' = subinstr(`x', "university", "u", .)
replace `x' = subinstr(`x', "univ", "u", .)
replace `x' = subinstr(`x', "uni", "u", .)
replace `x' = subinstr(`x', "institute", "i", .)
replace `x' = subinstr(`x', "institution", "i", .)
replace `x' = subinstr(`x', "insts", "i", .)
replace `x' = subinstr(`x', "inst", "i", .)
replace `x' = subinstr(`x', " ins", "i", .)
replace `x' = subinstr(`x', "technology", "tech", .)
replace `x' = subinstr(`x', "-", "", .)
replace `x' = subinstr(`x', ",", "", .)
replace `x' = subinstr(`x', "/", "", .)
replace `x' = subinstr(`x', "(", "", .)
replace `x' = subinstr(`x', ")", "", .)
replace `x' = subinstr(`x', ".", "", .)
replace `x' = subinstr(`x', "state", "st", .)
replace `x' = subinstr(`x', " s ", "st", .)
replace `x' = subinstr(`x', "school", "sch", .)
replace `x' = subinstr(`x', "schl", "sch", .)
replace `x' = subinstr(`x', "centr", "center", .)
replace `x' = subinstr(`x', "ctr", "center", .)
replace `x' = subinstr(`x', "calif", "california", .)
replace `x' = subinstr(`x', "rsrve", "reserve", .)
replace `x' = subinstr(`x', "medical", "med", .)
replace `x' = subinstr(`x', "medicine", "med", .)
replace `x' = subinstr(`x', "ga ", "georgia", .)
replace `x' = subinstr(`x', "hlth", "health", .)
replace `x' = subinstr(`x', "laboratory", "lab", .)
replace `x' = subinstr(`x', "labs", "lab", .)
replace `x' = subinstr(`x', "physicians", "phys", .)
replace `x' = subinstr(`x', "physician", "phys", .)
replace `x' = subinstr(`x', "agriculture", "ag", .)
replace `x' = subinstr(`x', "agricultural", "ag", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', "sciences", "science", .)
replace `x' = subinstr(`x', "int'l", "international", .)
replace `x' = subinstr(`x', "intl", "international", .)
replace `x' = subinstr(`x', " engineering ", "eng", .)
replace `x' = subinstr(`x', " cnty ", "county", .)
replace `x' = subinstr(`x', " okla ", "oklahoma", .)
replace `x' = subinstr(`x', " va ", "virginia", .)
replace `x' = subinstr(`x', " ill ", "illinois", .)
replace `x' = subinstr(`x', "@", "", .)
replace `x' = subinstr(`x', "canada", "", .)
replace `x' = subinstr(`x', "polytechnic", "poly", .)
replace `x' = subinstr(`x', "polytech", "poly", .)
replace `x' = subinstr(`x', "saint", "st", .)
* remove spaces
gen `x'_nospace = subinstr(`x', " ","", .)
sort `x'_nospace
}
rename ipeds_uni_to_clean ipeds_uni_cleaned
rename ipeds_uni_to_clean_nospace ipeds_uni_clns
br ipeds_uni_clean_full ipeds_uni_clns
gen uni_clns_to_match = ipeds_uni_clns
** after cleaning up the string, evaluate dups
sort uni_clns_to_match
quietly by uni_clns_to_match: gen dup = cond(_N==1,0,_n)
sum dup
br uni_clns_to_match dup if dup > 0
/* 3,695 duplicates  are not research institutions.
They are beauty, salon, real estate, scuba, bar-tending and other training institutes
for individuals to gain a professional degree. They are listed multiple times given that they
have multiple locations. */
drop if dup > 1
** 15,679 unique observations in IPEDS based on ipeds ID and string
drop dup
sort uni_clns_to_match
quietly by uni_clns_to_match: gen dup = cond(_N==1,0,_n)
sum dup
lab var uni_clns_to_match "Proposed Institution Match ID (IPEDS & GRFP)"
sort uni_clns_to_match 
gen bac_clns_to_match = uni_clns_to_match
lab var bac_clns_to_match "Baccalaureate Institution Match ID (IPEDS & GRFP)"
sort bac_clns_to_match
gen cur_clns_to_match = uni_clns_to_match
lab var cur_clns_to_match "Current Institution Match ID(IPEDS & GRFP)"
sort cur_clns_to_match
gen ipeds_cur_id = ipeds_id
gen ipeds_bac_id = ipeds_id
* MATCHING VARIABLES * uni_clns_to_match; bac_clns_to_match; cur_clns_to_match **
********************************************************************************
*save "$dir/IPEDS/IPEDS list clean round1.dta",replace
sort unique_counter_ipeds
save "$dir/IPEDS/IPEDS list clean round2.dta",replace
********************************************************************************
}
***** STEP 3: Merge IPEDS list to GRFP Proposed and Current Institution
{
clear all 
use "$dir/IPEDS/IPEDS list clean round2.dta"
keep ipeds_id uni_clns_to_match unique_counter_ipeds
sort uni_clns_to_match unique_counter_ipeds
save "$dir/IPEDS/IPEDS list clean round_uni.dta", replace 
********************************************************************************
clear all 
use "$dir/GRFP/GRFP list clean round2.dta"
sort uni_clns_to_match unique_counter_grfp
br uni_clns_to_match unique_counter_grfp
merge m:1 uni_clns_to_match using "$dir/IPEDS/IPEDS list clean round_uni.dta"
** PROJECT-level match procedure ** matched 42,959 GRFP projects; need to match 15259 remaining projects
drop if _merge == 2
* 15361 observations dropped *
**Matching procedute in DTA file: ipeds # (if match); -99 = foreign; -77 unable to identify institution; -55 unknown
gen ipeds_handmatch = .
********************************************************************************
** REPLACE -99 FOR FOREIGN INSTITUTIONS: 592 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y' == "adelaideu"|`y'=="amsterdamuof"|`y'=="aucklandu"|`y'=="australiannationalu"|`y'=="bielefelduof"|`y'=="bordeauxiuof"|`y'=="britishcolumu"|`y'=="cambridgeu"|`y'=="cambridgeucimr"|`y'=="cambridgeuof"|`y'=="chalmersutech"|`y'=="copenhagenu"|`y'=="cranfielditech"|`y'=="delftitech"|`y'=="denmarktechuof"|`y'=="durhamu"|`y'=="ecoleenscienceoc"|`y'=="ecolenationaledugenieruraldeseauxetdesforets"|`y'=="ecolenormalesuperieurelyon"|`y'=="ecolepolyniquefederalelausanne"|`y'=="edinburghuof"|`y'=="eidgenossischetechnischehochschulezurich"|`y'=="fritzhaberimaxplanck"|`y'=="georgaugustuersitaetgoettingen"|`y'=="georgiantechnicalu"|`y'=="glasgowuof"|`y'=="griffithu"|`y'=="heidelberguof"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y'=="imperialcollege"|`y'=="imperialcollegelondon"|`y'=="imperialcollegescitech"|`y'=="imperialcollegeulondon"|`y'=="indianitechbombay"|`y'=="internationalmaxplanckresearchsch"|`y'=="jamescooku"|`y'=="kentcantburyu"|`y'=="king'scollegelondon"|`y'=="lancasteru"|`y'=="lancasteruof"|`y'=="leibnitziitutfuermeereswissenschaftenanderukiel"|`y'=="leidenu"|`y'=="leipzigschhumanorigins"|`y'=="liegeuof"|`y'=="londonschecon"|`y'=="londonscheconomics"|`y'=="londonscheconomicspoliticalscience"|`y'=="ludwigmaximiliansu"|`y'=="lundu"|`y'=="lunduof"|`y'=="macquarieu"|`y'=="masaryku"|`y'=="mcgillmontreal"|`y'=="mcgillu"|`y'=="mcgillumontreal"|`y'=="mcmasteru"|`y'=="mcmasteruontar"|`y'=="medresearchcouncil"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y'=="memorialnewfdld"|`y'=="nationalucordoba"|`y'=="nationalusingapore"|`y'=="newbrunswicku"|`y'=="newcastleu"|`y'=="newstwalesu"|`y'=="otagouof"|`y'=="oxfordu"|`y'=="oxforduof"|`y'=="plymouthu"|`y'=="qinghuau"|`y'=="queen'su"|`y'=="queenskingsnon"|`y'=="readinguof"|`y'=="royalhollowayulondon"|`y'=="rwthaachenu"|`y'=="salemteikyou"|`y'=="simonfraseru"|`y'=="southafricauof"|`y'=="standrewsu"|`y'=="stfrnxaviern"|`y'=="swissfederalitecheth"|`y'=="technicaluberlin"|`y'=="technicaluersitaetdresden"|`y'=="technicalumuch"|`y'=="thelondonscheconomicspoliticalscience"|`y'=="theumanchester"|`y'=="torontouof"|`y'=="tubingenuof"|`y'=="tudresden"|`y'=="uaberdeen"|`y'=="uaixmarseilleiii"|`y'=="ualberta"|`y'=="uauckland"|`y'=="ubergen"|`y'=="ubern"|`y'=="ubristol"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y'=="ubritishcolumbia"|`y'=="ubuenosaires"|`y'=="ubuenosairesschagronomy"|`y'=="ucalgary"|`y'=="ucambridge"|`y'=="ucanterbury"|`y'=="ucapetown"|`y'=="ucollegelondon"|`y'=="ucolllondon"|`y'=="udurham"|`y'=="uedinburgh"|`y'=="uersitedaixmarseille"|`y'=="uexeter"|`y'=="ufederaldoacre"|`y'=="uglasgow"|`y'=="uguelph"|`y'=="uhongkong"|`y'=="uinnsbruck"|`y'=="ulondon"|`y'=="umanitoba"|`y'=="umelbourne"|`y'=="unewcastleupontyne"|`y'=="unewsouthwales"|`y'=="uoxford"|`y'=="uparisvi"|`y'=="uqueensland"|`y'=="usaskatchewan"|`y'=="usheffield"|`y'=="usouthampton"|`y'=="ustandrews"|`y'=="usydney"|`y'=="utoronto"|`y'=="utrechtstuof"|`y'=="uutrecht"|`y'=="uvictoria"|`y'=="uwarwick"|`y'=="uwaterloo"|`y'=="uwesternontario"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y'=="ulondonking'scollege"|`y'=="uwindsor"|`y'=="uyork"|`y'=="victoriaumanchester"|`y'=="victoriauwellington"|`y'=="walesucollege"|`y'=="waterloouont"|`y'=="windsoruof"|`y'=="yorku"|`y'=="yorkuof"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -99 if `y'=="ubirmingham"|`y'=="technischeuersitaetmuenchen"|`y'=="stfrnxavierns"|`y'=="newyorkuabudhabi"|`y'=="kingabdullahusciencetech"
}
}
********************************************************************************
** REPLACE -77 FOR UNIVERSITIES WITH NO IPEDS: 64 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="alabamaubirmingham"|`y'=="aberystwythu"|`y'=="americanmuseumnaturalhistory"|`y'=="architeccturalassociation"|`y'=="binghamtonu"|`y'=="collegeforindustrialengrsiqs"|`y'=="costaricauof"|`y'=="factoryphysicsinc"|`y'=="hawaiieastwestcenter"|`y'=="meddentnjrwjohnsonpsctaway"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="missouriusciencetech"|`y'=="montereyiforinternationalstd"|`y'=="oregongraduatei"|`y'=="oregongraduateiciencetech"|`y'=="oxfordpoly"|`y'=="proposedgraduatei"|`y'=="schadvancedinternationalstudies"|`y'=="soclstudiesi"|`y'=="southwesttexasstu"|`y'=="texuhealthscihoust"|`y'=="texumedbrgalvstn"|`y'=="texuswstrnmedcenter"|`y'=="umeddentnjpiscataway"|`y'=="usouthwesternlouisiana"|`y'=="virginiaimarinescience"|`y'=="virginiatechassociates"|`y'=="westvirginiaitech"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="cityu"|`y'=="lelandstanfordjru"|`y'=="newyorkbotanicalgarden"|`y'=="richardgildergraduatesch"|`y'=="simon'srockbardcollege"|`y'=="memorialsloanketteringcancercenter"|`y'=="gerstnersloanketteringgraduatesch"|`y'=="gerstnersloanketteringgraduateschbiomedscience"|`y'=="gerstnersloanketteringschbiomedscience"|`y'=="sloanketteringiforcancerresearch"
replace `x' = -77 if `y' == "georgiacollege"
replace `x' = -77 if `y' == "uhawaiikapiolanicommutycollege"
}
}
********************************************************************************
** REPLACE -55 FOR NO UNIVERSITY AFFILIATION: 9 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -55 if `y'=="don'tknowyetwillfindoutinmarch"|`y'=="applyingto:ucbucsbstanfordcaltech"|`y'=="notyetknown"|`y'=="null"||`y'=="undecided"|`y'=="unknowncurrentlyapplying"|`y'=="."
}
}
********************************************************************************
** REPLACE MISSING WITH IPEDS: 14241 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
set more off
replace `x' = 178402 if `y' == "umissourikansascity"
replace `x' = 178411 if `y' == "umissourirolla"|`y' == "missouriusciencetech"|`y' == "missouriurolla"
replace `x' = 178420 if `y' == "umissouristlouis"
replace `x' = 178396 if `y' == "missouriucolumbia"
replace `x' = 103112 if `y' == "alaskauof"|`y'=="ualaska"|`y'=="ualaskaanchorage"
replace `x' = 210562 if `y' == "alberteieincollegemed"
replace `x' = 201007 if `y' == "antiochnewenglandgradsch"
replace `x' = 104151 if `y' == "arizonastuphoenix"|`y'=="larizonastu"
replace `x' = 104179 if `y' == "arizonauof"|`y'=="uarizonadeptagbiosystemsengineering"
replace `x' = 106458 if `y' == "arkansasstu"
replace `x' = 104151 if `y' == "arizonastu"
replace `x' = 100858 if `y' == "auburnu"
replace `x' = 164988 if `y' == "bostonugraduatesch"|`y'=="bostonucharlesrivercampus"|`y'=="bostonueristy"|`y'=="bostonugraduateschartsscience"|`y'=="bostonuschmed"|`y'=="trusteesbostonu"
replace `x' = 201441 if `y' == "bowlinggreenstu"
replace `x' = 190567 if `y' == "brooklyncollege"|`y'=="cityunewyork"|`y'=="cunygraduateschucenter"|`y'=="cunyhuntercollege"|`y'=="cunymtsinaischmed"|`y'=="cunyqueenscollege"|`y'=="huntercollegecuny"|`y'=="thegraduatecentercuny"
replace `x' = 110714 if `y' == "californiacruzlick"
replace `x' = 110422 if `y' == "californiaorniapolystufoundation"|`y'=="californiaorniapolystupomona"|`y'=="californiaorniapolystusanluisobi"
replace `x' = 112765 if `y' == "californiaorniastufullertonfoundation"|`y'=="californiaorniastuhayward"
replace `x' = 110635 if `y' == "ucberkeleyucsfjointbioengineeringprogram"|`y'=="ucberkeleyucsanfranciscojointbioengineeringprogram"|`y'=="ucberkeley"|`y'=="ucb"|`y'=="californiauberkeley"|`y'=="gyucaliforniaorniaberkeley"|`y'=="theucaliforniaorniaberkeley"|`y'=="ucaliforniaornia"|`y'=="ucaliforniaorniaberkeleyucsanfranciscojointpr"
replace `x' = 110644 if `y' == "californiaudavis"|`y'=="eucaliforniaorniadavis"|`y'=="theucaliforniaorniadavis"
replace `x' = 110653 if `y' == "californiauirvine"
replace `x' = 110662 if `y' == "californiaulosangeles"|`y'=="ucaliforniaorniaa"|`y'=="uclahighereducationresearchi"
replace `x' = 110671 if `y' == "californiauriverside"|`y'=="ucaliforniaorniariversidefoundation"
replace `x' = 110680 if `y' == "californiausandiego"|`y'=="gyucaliforniaorniasandiego"|`y'=="oucaliforniaorniasandiego"|`y'=="scrippsioceanographyucaliforniaorniasandiego"|`y'=="scrippsioceanographyucsandiego"|`y'=="scrippsioceanographyucsd"|`y'=="scrippsioceanography"|`y'=="theucaliforniaorniasandiego"|`y'=="ucaliforniaorniasandiegorevellecollege"|`y'=="ucaliforniaorniasandiegoschmed"|`y'=="ucaliforniaorniasandiegoscrippsiocean"|`y'=="ucaliforniaorniasandiegoscrippsioceanography"|`y'=="ucaliforniaorniasandiegosio"
replace `x' = 110705 if `y' == "californiausantabarb"|`y'=="ucaliforniaorniasantabarb"
replace `x' = 110714 if `y' == "californiausantacruz"|`y'=="theucaliforniaorniasantacruz"
replace `x' = 110404 if `y' == "calitech"|`y'=="ecaliforniaorniaitech"|`y'=="gycaliforniaorniaitech"|`y'=="californiaorniastpolyu"
replace `x' = 110699 if `y' == "calusanfrancisco"|`y'=="gyucaliforniaorniasanfrancisco"|`y'=="ucaliforniaorniacenterhealthscila"|`y'=="ucaliforniaorniamedcentsanfran"|`y'=="ucsf"
replace `x' = 173258 if `y' == "carletonu"
replace `x' = 211440 if `y' == "carnegiemellonitech"|`y'=="ucarnegiemellonu"
replace `x' = 201645 if `y' == "casewestrnreserve"
replace `x' = 144050 if `y' == "chicagouof"|`y'=="uchicagopritzkerschmed"
replace `x' = 153603 if `y' == "chiowastu"
replace `x' = 206260 if `y' == "cincinnatiuof"|`y'=="ucincinnati"|`y'=="ucincinnaticollegemed"
replace `x' = 112251 if `y' == "claremontgraduatesch"|`y'=="ranchosantaanabotanicgardenclaremontgraduateu"
replace `x' = 436377 if `y' == "coldspringharborlab"
replace `x' = 231624 if `y' == "collegewilliammaryvirginiaimarinescience"|`y'=="williammaryc"
replace `x' = 126614 if `y' == "coloradouboulder"|`y'=="ucolorado"|`y'=="uucolorado"
replace `x' = 126562 if `y' == "coloudenvercenter"|`y'=="ucoloradodenverhealthsciencecenter"
replace `x' = 190150 if `y' == "columbaphyssurg"|`y'=="columbiabarnard"|`y'=="columbiau"|`y'=="columbiaucollegephysurg"|`y'=="columbiaucolphyssurgeons"|`y'=="columbiaucphys"|`y'=="columbiaulamontdohertyearthobservatory"|`y'=="columbiaupresbymedcenter"|`y'=="columbiauteacherscollege"|`y'=="teacherscollegecolumbiau"
replace `x' = 129020 if `y' == "uconnecticutmarinesciencei"|`y'=="connecticutuof"|`y'=="connuhealthcenter"|`y'=="uconnecticutaverypoint"|`y'=="uconnecticuthealthcenter"|`y'=="uconnecticutstorrs"
replace `x' = 190406 if `y' == "cornell"|`y'=="ecornellu"|`y'=="gycornellu"
replace `x' = 190424 if `y' == "weillcornellmedsch"|`y'=="weillgraduatecollegemedsciencecornellu"|`y'=="weillgraduateschmedsciencecornellu"|`y'=="cornelluweillcornellmedcollege"|`y'=="cornellweillgraduateschmedscience"|`y'=="joansanfordiweillmedcollegecornellu"
replace `x' = 190433 if `y' == "cornelluendowed"|`y'=="cornellumedcampus"|`y'=="cornellumedcol"|`y'=="cornellust"
replace `x' = 193900 if `y' == "courantimathscience"
replace `x' = 182670 if `y' == "dartmouthcollegethayerschengineering"|`y'=="dartmouthmedsch"
replace `x' = 130943 if `y' == "delawareuof"|`y'=="udelawarecollegemarinestudies"
replace `x' = 198419 if `y' == "dukeumedcenter"|`y'=="dukeuschmed"
replace `x' = 166027 if `y' == "harvardcollegepresidentfellowsof"|`y'=="eharvardu"|`y'=="gyharvardu"|`y'=="gyharvardumedsch"|`y'=="harvardmassachusettsitech"|`y'=="harvardmedsch"|`y'=="harvardmitdivhealthsci"|`y'=="harvardmitdivisionhealthsciencetech"|`y'=="harvardmitprog"|`y'=="harvardmitproghealthsci"|`y'=="harvardumedsch"|`y'=="harvarduschpublichealth"|`y'=="harvarduunk"|`y'=="lharvardu"|`y'=="paharvardu"|`y'=="sharvardu"
replace `x' = 139658 if `y' == "emoryuyerkesregionalprimateresearchcenter"
replace `x' = 240444 if `y' == "euwisconsinmadison"
replace `x' = 221999 if `y' == "evanderbiltu"|`y'=="vanderbiltuschmed"|`y'=="vanderbiltumedcenter"
replace `x' = 133650 if `y' == "floridaagmechu"
replace `x' = 134130 if `y' == "floridauof"|`y'=="gyuflorida"|`y'=="ufloridaifoodagric"
replace `x' = 139755 if `y' == "geogiaitech"|`y'=="georgiaiitutetech"|`y'=="georgiaitech"|`y'=="georgiaitechgeorgiatech"|`y'=="georgiatechresearchcorporation"|`y'=="georgiatechresearchcorporationgeorgiaitech"|`y'=="gygeorgiaitech"|`y'=="jointgeorgiaitech"|`y'=="medcollegegeorgiaresearchiinc"|`y'=="thegeorgiaitech"|`y'=="ugeorgiaitech"
replace `x' = 139959 if `y' == "georgiauof"
replace `x' = 131469 if `y' == "geowashingtonu"|`y'=="georgewashingtonuthe"
replace `x' = 166683 if `y' == "gymassachusettsitech"|`y'=="lmassachusettsitech"|`y'=="massachussettsitech"|`y'=="massitech"|`y'=="mit"|`y'=="omassachusettsitech"|`y'=="umassachusettsitech"
replace `x' = 186131 if `y' == "gyprincetonu"
replace `x' = 243744 if `y' == "ystanfordu"|`y'=="gystanfordu"|`y'=="lstanfordu"|`y'=="ostanfordu"|`y'=="stanfordfdresi"|`y'=="stanfordschmed"|`y'=="stanforduhopkinsmarinestation"|`y'=="stanforduscheng"|`y'=="stanforduschmed"
replace `x' = 104179 if `y' == "gyuarizona"
replace `x' = 174066 if `y' == "gyuminnesotaminneapolis"|`y'=="minnesotauminneapl"|`y'=="uminnesota"|`y'=="uminnesotaitech"|`y'=="uminnesotaminneapolis"
replace `x' = 174066 if `y' == "uminnesotatwincit"
replace `x' = 215062 if `y' == "gyupennsylvania"|`y'=="pennsylvaniauof"
replace `x' = 236948 if `y' == "gyuwashington"|`y'=="uwgraduatesch"
replace `x' = 130794 if `y' == "gyyaleu"|`y'=="yaleuschforestryenvironmentalstudies"|`y'=="yaleuschmed"|`y'=="yaleuyaleschmed"
replace `x' = 141963 if `y' == "hawaiiuof"|`y'=="hawaiiusystemoff"|`y'=="uhawaii"|`y'=="uhawaiihonolulucc"
replace `x' = 225511 if `y' == "houstonuof"
replace `x' = 115755 if `y' == "humboldtstufoundation"
replace `x' = 142285 if `y' == "idahouof"
replace `x' = 145813 if `y' == "illinoisstunormal"|`y'=="illinoisuunknown"
replace `x' = 145637 if `y' == "illuurbanachamp"|`y'=="luillinoisurbanachampaign"|`y'=="uillinoisurbanachampaign"
replace `x' = 151351 if `y' == "indianau"|`y'=="indianaubloomngton"
replace `x' = 151102 if `y' == "indianaupurduefortwayne"
replace `x' = 151111 if `y' == "indianauschmed"
replace `x' = 153658 if `y' == "iowauof"|`y'=="uiowacollegemed"
replace `x' = 162928 if `y' == "johnshopkinsbloombergschpublichealth"|`y'=="jhopkinsmedi"|`y'=="johnshopkinsmedi"|`y'=="johnshopkinsschmed"|`y'=="johnshopkinsukriegerschartsscience"|`y'=="johnshopkinsumedi"|`y'=="johnshopkinsupeabodyi"|`y'=="johnshopkinsuschhygienepublichealth"|`y'=="johnshopkinsuschmed"
replace `x' = 155256 if `y' == "kansasuof"|`y'=="ukansasmaincampus"
replace `x' = 203517 if `y' == "kentstu"
replace `x' = 157085 if `y' == "kentuckyuof"|`y'=="ukentuckylexington"|`y'=="ukentuckyresearchfoundation"
replace `x' = 165334 if `y' == "lclarku"
replace `x' = 146676 if `y' == "lincolnc"
replace `x' = 159391 if `y' == "louisianastuamcollege"
replace `x' = 159373 if `y' == "louisianastuschmedneworl"
replace `x' = 435000 if `y' == "louisianastuschmedshrevep"
replace `x' = 159647 if `y' == "louisianatechunversity"
replace `x' = 157289 if `y' == "louisvilleuof"|`y'=="ulouisvilleresearchfoundationinc"|`y'=="ulouisvilleschmed"
replace `x' = 141574 if `y' == "hawaiiimarinebiologyuhawaiimanoa"
replace `x' = 435338 if `y' == "lscrippsresearchi"
replace `x' = 170976 if `y' == "lumichigan"|`y'=="michiganuof"|`y'=="umichigan"|`y'=="umichiganmedsch"
replace `x' = 161253 if `y' == "maineuorono"|`y'=="umaineorono"
replace `x' = 163268 if `y' == "marylandubaltcnty"
replace `x' = 163286 if `y' == "marylanduof"|`y'=="umaryland"
replace `x' = 232681 if `y' == "marywashingtoncollege"
replace `x' = 166629 if `y' == "massuofamherst"|`y'=="umassachusettsamherst"|`y'=="umassachusettsmedsch"
replace `x' = 167987 if `y' == "umassachusettsdartmouth"
replace `x' = 166638 if `y' == "umassachusettsharborcampus"
replace `x' = 166513 if `y' == "umassachusettslowell"
replace `x' = 166708 if `y' == "umassachusettsworcester"
replace `x' = 173948 if `y' == "mayoclinicrochester"|`y'=="mayograduatesch"
replace `x' = 204024 if `y' == "miamiu"|`y'=="miamiuoxfordcampus"
replace `x' = 135726 if `y' == "miamiuof"|`y'=="umiamirosenstielschmarineatmosphericsci"|`y'=="umiamischmed"
replace `x' = 171128 if `y' == "michigantechu"
replace `x' = 171137 if `y' == "michiganudearborn"
replace `x' = 180489 if `y' == "montanauof"|`y'=="umontana"|`y'=="umontanawestern"
replace `x' = 199102 if `y' == "ncagtechstu"|`y'=="northcarolinaagtechnicalstu"
replace `x' = 199193 if `y' == "ncsturaleigh"|`y'=="northcarolinastu"
replace `x' = 199120 if `y' == "ncuofchapelhill"|`y'=="unorthcarolina"|`y'=="theunorthcarolinachapelhill"
replace `x' = 199218 if `y' == "ncuofwilmington"
replace `x' = 181464 if `y' == "nebraskaulincoln"
replace `x' = 181428 if `y' == "nebraskaumedcenter"
replace `x' = 182290 if `y' == "nevadauof"|`y'=="unevada"|`y'=="unevadarenofleischmannplanetarium"
replace `x' = 183044 if `y' == "newhampshireuof"|`y'=="unewhampshire"|`y'=="unewhampshiredurham"
replace `x' = 188012 if `y' == "newmexicostu"
replace `x' = 187985 if `y' == "newmexicouof"|`y'=="unewmexico"
replace `x' = 193654 if `y' == "newschforsocialresearch"|`y'=="newschu"
replace `x' = 194152 if `y' == "newyorkstcollegeceramicsalfredu"
replace `x' = 193900 if `y' == "newyorkucourantimathematicalscience"|`y'=="newyorkumedcenter"|`y'=="newyorkuschmed"
replace `x' = 187967 if `y' == "nmeximiningtech"
replace `x' = 200332 if `y' == "northdakotastu"|`y'=="northdakotastufargo"
replace `x' = 147749 if `y' == "northwesternuchicagocampus"|`y'=="northwesternuchicago"
replace `x' = 152080 if `y' == "notredameuof"
replace `x' = 136215 if `y' == "novaseuoceancenter"
replace `x' = 204796 if `y' == "ohiostu"|`y'=="ohiosturesearchfoundation"|`y'=="theohiostu"
replace `x' = 204857 if `y' == "ohiou"
replace `x' = 207388 if `y' == "oklahomastu"
replace `x' = 207430 if `y' == "oklahomauof"|`y'=="oklauhealthsci"|`y'=="uoklahoma"
replace `x' = 209551 if `y' == "oregonuof"|`y'=="uoregoneugene"
replace `x' = 228778 if `y' == "outexasaustin"|`y'=="texasuaustin"|`y'=="utexasaustin"
replace `x' = 214777 if `y' == "pennsylvaniastu"|`y'=="thepennsylvaniastu"|`y'=="pennsylvaniastuupark"|`y'=="pennstcollegemed"|`y'=="pennstuupark"|`y'=="pennstuhershymed"|`y'=="pennstuhersheymedcntr"|`y'=="pennstugreatvalley"|`y'=="pennstucapitalcollege"|`y'=="pennstu"|`y'=="pennsylvaniastucommonwealthcollege"|`y'=="schreyerhonorscollegepennsylvaniastu"
replace `x' = 214227 if `y' == "pennsylvaniastberkslehighvalleycollege"
replace `x' = 215293 if `y' == "pittsburghuof"|`y'=="upittsburgh"
replace `x' = 194541 if `y' == "polyunewyork"
replace `x' = 152266 if `y' == "purdueunorthcenteral"
replace `x' = 243780 if `y' == "purdueuscheng"
replace `x' = 217484 if `y' == "rhodeislanduof"|`y'=="urhodeislandschoceanograph"
replace `x' = 195030 if `y' == "rochesteruof"|`y'=="urochestermedsch"
replace `x' = 186371 if `y' == "rutgersstunewark"|`y'=="rutgersstunewbrunswick"|`y'=="rutgersu"|`y'=="rutgersunewark"|`y'=="rutgersunewbrunswick"
replace `x' = 122436 if `y' == "sandiegouof"
replace `x' = 122409 if `y' == "sandiegostufoundation"|`y'=="sandiegostuucaliforniaorniadavis"
replace `x' = 435338 if `y' == "lscrippsresearchi"|`y'=="thescrippsresearchi"|`y'=="scrippsresi"|`y'=="thescrippsresearchiflorida"|`y'=="thescrippsresearchikelloggschsciencetech"
replace `x' = 219347 if `y' == "sdschminestech"
replace `x' = 218663 if `y' == "southcarolinauof"|`y'=="usouthcarolina"|`y'=="usouthcarolinaschmed"
replace `x' = 123961 if `y' == "southerncaliforniauof"|`y'=="usoutherncallosangeles"
replace `x' = 149222 if `y' == "southernillinoisu"
replace `x' = 228246 if `y' == "sthrnmethodistu"
replace `x' = 179159 if `y' == "stlouisu"
replace `x' = 196255 if `y' == "stunewyork"|`y'=="theuthestnewyork"
replace `x' = 196130 if `y' == "sunybuffalo"
replace `x' = 196282 if `y' == "stunystonybrook"|`y'=="sunystbrkhthsci"|`y'=="sunystonybrook"
replace `x' = 196103 if `y' == "sunyenvironscienceforestry"
replace `x' = 196307 if `y' == "sunyupstmedu"
replace `x' = 221759 if `y' == "tennuknoxville"
replace `x' = 228723 if `y' == "texasamumaincampus"|`y'=="ustexasamu"
replace `x' = 229090 if `y' == "texasudallas"|`y'=="texasuelpaso"
replace `x' = 195049 if `y' == "therockefelleru"
replace `x' = 199139 if `y' == "theunorthcarolinacharlotte"
replace `x' = 221759 if `y' == "theutennesseememphishealthsciencecenter"
replace `x' = 216366 if `y' == "thomasjeffrsonu"
replace `x' = 168148 if `y' == "tuftsuschmed"
replace `x' = 160755 if `y' == "tulaneu"|`y'=="tulaneula"|`y'=="tulaneuschmed"
replace `x' = 200800 if `y' == "uakron"
replace `x' = 100733 if `y' == "ualabama"|`y'=="ualabamatuscaloosa"
replace `x' = 100706 if `y' == "ualabamahuntsville"|`y'=="theualabamainhuntsville"
replace `x' = 102614 if `y' == "ualaskafairbankscampus"|`y'=="ualaskafairbanksjuneaucampus"
replace `x' = 106397 if `y' == "uarkansasfayetteville"|`y'=="uarkansasmedscience"|`y'=="uarkansasmedsciencecampus"
replace `x' = 445188 if `y' == "ucmerced"
replace `x' = 145600 if `y' == "uillinois"
replace `x' = 176017 if `y' == "umississippi"
replace `x' = 181428 if `y' == "unebraskaomaha"
replace `x' = 200280 if `y' == "unorthdakotamaincampus"
replace `x' = 215062 if `y' == "upaschmed"|`y'=="upennsylvaniaschmed"|`y'=="upennsylvaniawhartonschfinancecom"
replace `x' = 251154 if `y' == "upuertoricomayaguez"|`y'=="prumayaguez"|`y'=="upuertoricoriopiedras"|`y'=="upuertoricomedsci"
replace `x' = 181002 if `y' == "uscreightonu"
replace `x' = 448840 if `y' == "usouthfloridastpetersburg"
replace `x' = 230764 if `y' == "utahuof"
replace `x' = 221759 if `y' == "utennesseehealthsciencecenter"|`y'=="utennesseeknoxville"|`y'=="utennesseespacei"
replace `x' = 229090 if `y' == "utexasarlington"|`y'=="utexasdallas"|`y'=="utexaselpaso"|`y'=="utexasmedbrchgalveston"
replace `x' = 416801 if `y' == "utexascancercenterhouston"|`y'=="utexashealthsciencecenterhouston"|`y'=="utexashealthscihouston"|`y'=="utexashealthsciencecentersanantonio"|`y'=="utexashealthscisanantonio"|`y'=="utexassanantonio"|`y'=="utexassouthwesternmedcenter"|`y'=="utexassouthwesternmedcenterdallas"|`y'=="utsouthwesternmedcenter"
replace `x' = 231174 if `y' == "uvermontburlington"|`y'=="uvermontstagcollege"|`y'=="vermontuof"
replace `x' = 234076 if `y' == "uvirginia"|`y'=="uvirginiacharlottesville"|`y'=="virginiauof"
replace `x' = 233921 if `y' == "uvirginiapolyistu"|`y'=="vapolyistu"|`y'=="vapolyitateu"|`y'=="virginiatech"|`y'=="virginiapolyitateun"
replace `x' = 236948 if `y' == "uwashington"|`y'=="uwashingtonschmed"|`y'=="washingtonuof"
replace `x' = 240055 if `y' == "uwisconsin"|`y'=="uwisconsinschmed"|`y'=="wisconsinumadison"
replace `x' = 234030 if `y' == "virginiacommonwealthumedcoll"
replace `x' = 199847 if `y' == "wakeforestubowmangrayschmed"
replace `x' = 236939 if `y' == "washingtonstuspokane"|`y'=="washingtonstuvancouver"
replace `x' = 179867 if `y' == "washingtonuinstlouisschmed"|`y'=="washingtonu"|`y'=="washingtonuschmed"
replace `x' = 238032 if `y' == "westvirginiauresearchcorporation"
replace `x' = 227757 if `y' == "williammarshriceu"
replace `x' = 166610 if `y' == "woodsholeoceanographic"|`y'=="woodsholeoceanographici"|`y'=="woodsholeoceanographici+mitjointprogram"|`y'=="woodsholeoceanographicibiologicaloceanography"|`y'=="woodsholeocni"
replace `x' = 206598 if `y' == "wrightstu"
replace `x' = 240727 if `y' == "wyominguof"
replace `x' = 197708 if `y' == "yeshivauaeieincollegemed"
replace `x' = 219471 if `y' == "usouthdakotamaincampus"
replace `x' = 194310 if `y' == "paceu"
replace `x' = 213020 if `y' == "indianaupennsylvania"
replace `x' = 206589 if `y' == "collegewooster"
replace `x' = 110422 if `y' == "californiaorniapolystu"
replace `x' = 206084 if `y' == "utoledo"
replace `x' = 121628 if `y' == "pardeerandgraduatesch"
}
}
********************************************************************************
** IN TOTAL REPLACED 14906 observations with hand match procedure
********************************************************************************
gen double ipeds = ipeds_id
replace ipeds = ipeds_handmatch if ipeds_handmatch !=. 
lab var ipeds "IPEDS ID, final match (for proposed institution)"
count if ipeds !=.
count if ipeds !=. & ipeds >0
save "$dir/GRFP/GRFP list with uni IPEDS.dta",replace
** NOTES: 58218 observations 
** Match IPEDS (including -99, -77, -55) for ~ 57804 observations: 99.3 % Match Rate
** Match IPEDS (excluding -99, -77, -55) for ~ 57139 observations: 98.1 % Match Rate

****************************************************************************************************************************************************************
********************************************************************************
** Merge IPEDS list to GRFP Current Institution
********************************************************************************
clear all 
use "$dir/IPEDS/IPEDS list clean round2.dta"
keep ipeds_cur_id cur_clns_to_match
sort cur_clns_to_match
save "$dir/IPEDS/IPEDS list clean round_cur.dta", replace 
********************************************************************************
clear all 
use "$dir/GRFP/GRFP list with uni IPEDS.dta"
sort cur_clns_to_match
rename _merge _merge_uni
merge m:1 cur_clns_to_match using "$dir/IPEDS/IPEDS list clean round_cur.dta"
/** PROJECT-level match procedure ** 
	matched 21911 GRFP projects; 
	36307 not matched in master
		27727 have no current institution (based on sorting current_uni)
		need to match 8580 remaining projects (= 36307 - 27727)
	30491 possible current institutions to match on (= 58218 - 27727)
**/
drop if _merge == 2
**Matching procedute in DTA file: ipeds # (if match); -99 = foreign; -77 unable to identify institution; -55 unknown
gen ipeds_handmatch_cur = .
********************************************************************************
** REPLACE -99 FOR FOREIGN INSTITUTIONS: 270 observations
********************************************************************************
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y' == "adelaideu"|`y'=="amsterdamuof"|`y'=="aucklandu"|`y'=="australiannationalu"|`y'=="bielefelduof"|`y'=="bordeauxiuof"|`y'=="britishcolumu"|`y'=="cambridgeu"|`y'=="cambridgeucimr"|`y'=="cambridgeuof"|`y'=="chalmersutech"|`y'=="copenhagenu"|`y'=="cranfielditech"|`y'=="delftitech"|`y'=="denmarktechuof"|`y'=="durhamu"|`y'=="ecoleenscienceoc"|`y'=="ecolenationaledugenieruraldeseauxetdesforets"|`y'=="ecolenormalesuperieurelyon"|`y'=="ecolepolyniquefederalelausanne"|`y'=="edinburghuof"|`y'=="eidgenossischetechnischehochschulezurich"|`y'=="fritzhaberimaxplanck"|`y'=="georgaugustuersitaetgoettingen"|`y'=="georgiantechnicalu"|`y'=="glasgowuof"|`y'=="griffithu"|`y'=="heidelberguof"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y'=="imperialcollege"|`y'=="imperialcollegelondon"|`y'=="imperialcollegescitech"|`y'=="imperialcollegeulondon"|`y'=="indianitechbombay"|`y'=="internationalmaxplanckresearchsch"|`y'=="jamescooku"|`y'=="kentcantburyu"|`y'=="king'scollegelondon"|`y'=="lancasteru"|`y'=="lancasteruof"|`y'=="leibnitziitutfuermeereswissenschaftenanderukiel"|`y'=="leidenu"|`y'=="leipzigschhumanorigins"|`y'=="liegeuof"|`y'=="londonschecon"|`y'=="londonscheconomics"|`y'=="londonscheconomicspoliticalscience"|`y'=="ludwigmaximiliansu"|`y'=="lundu"|`y'=="lunduof"|`y'=="macquarieu"|`y'=="masaryku"|`y'=="mcgillmontreal"|`y'=="mcgillu"|`y'=="mcgillumontreal"|`y'=="mcmasteru"|`y'=="mcmasteruontar"|`y'=="medresearchcouncil"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y'=="memorialnewfdld"|`y'=="nationalucordoba"|`y'=="nationalusingapore"|`y'=="newbrunswicku"|`y'=="newcastleu"|`y'=="newstwalesu"|`y'=="otagouof"|`y'=="oxfordu"|`y'=="oxforduof"|`y'=="plymouthu"|`y'=="qinghuau"|`y'=="queen'su"|`y'=="queenskingsnon"|`y'=="readinguof"|`y'=="royalhollowayulondon"|`y'=="rwthaachenu"|`y'=="salemteikyou"|`y'=="simonfraseru"|`y'=="southafricauof"|`y'=="standrewsu"|`y'=="stfrnxaviern"|`y'=="swissfederalitecheth"|`y'=="technicaluberlin"|`y'=="technicaluersitaetdresden"|`y'=="technicalumuch"|`y'=="thelondonscheconomicspoliticalscience"|`y'=="theumanchester"|`y'=="torontouof"|`y'=="tubingenuof"|`y'=="tudresden"|`y'=="uaberdeen"|`y'=="uaixmarseilleiii"|`y'=="ualberta"|`y'=="uauckland"|`y'=="ubergen"|`y'=="ubern"|`y'=="ubristol"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y'=="ubritishcolumbia"|`y'=="ubuenosaires"|`y'=="ubuenosairesschagronomy"|`y'=="ucalgary"|`y'=="ucambridge"|`y'=="ucanterbury"|`y'=="ucapetown"|`y'=="ucollegelondon"|`y'=="ucolllondon"|`y'=="udurham"|`y'=="uedinburgh"|`y'=="uersitedaixmarseille"|`y'=="uexeter"|`y'=="ufederaldoacre"|`y'=="uglasgow"|`y'=="uguelph"|`y'=="uhongkong"|`y'=="uinnsbruck"|`y'=="ulondon"|`y'=="umanitoba"|`y'=="umelbourne"|`y'=="unewcastleupontyne"|`y'=="unewsouthwales"|`y'=="uoxford"|`y'=="uparisvi"|`y'=="uqueensland"|`y'=="usaskatchewan"|`y'=="usheffield"|`y'=="usouthampton"|`y'=="ustandrews"|`y'=="usydney"|`y'=="utoronto"|`y'=="utrechtstuof"|`y'=="uutrecht"|`y'=="uvictoria"|`y'=="uwarwick"|`y'=="uwaterloo"|`y'=="uwesternontario"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y'=="ulondonking'scollege"|`y'=="uwindsor"|`y'=="uyork"|`y'=="victoriaumanchester"|`y'=="victoriauwellington"|`y'=="walesucollege"|`y'=="waterloouont"|`y'=="windsoruof"|`y'=="yorku"|`y'=="yorkuof"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -99 if `y'=="ubirmingham"|`y'=="technischeuersitaetmuenchen"|`y'=="stfrnxavierns"|`y'=="newyorkuabudhabi"|`y'=="kingabdullahusciencetech"
}
}
********************************************************************************
** REPLACE -77 FOR UNIVERSITIES WITH NO IPEDS: 45 observations
********************************************************************************
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -77 if `y'=="alabamaubirmingham"|`y'=="aberystwythu"|`y'=="americanmuseumnaturalhistory"|`y'=="architeccturalassociation"|`y'=="binghamtonu"|`y'=="collegeforindustrialengrsiqs"|`y'=="costaricauof"|`y'=="factoryphysicsinc"|`y'=="hawaiieastwestcenter"|`y'=="meddentnjrwjohnsonpsctaway"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -77 if `y'=="missouriusciencetech"|`y'=="montereyiforinternationalstd"|`y'=="oregongraduatei"|`y'=="oregongraduateiciencetech"|`y'=="oxfordpoly"|`y'=="proposedgraduatei"|`y'=="schadvancedinternationalstudies"|`y'=="soclstudiesi"|`y'=="southwesttexasstu"|`y'=="texuhealthscihoust"|`y'=="texumedbrgalvstn"|`y'=="texuswstrnmedcenter"|`y'=="umeddentnjpiscataway"|`y'=="usouthwesternlouisiana"|`y'=="virginiaimarinescience"|`y'=="virginiatechassociates"|`y'=="westvirginiaitech"
}
}
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -77 if `y'=="cityu"|`y'=="lelandstanfordjru"|`y'=="newyorkbotanicalgarden"|`y'=="richardgildergraduatesch"|`y'=="simon'srockbardcollege"|`y'=="memorialsloanketteringcancercenter"|`y'=="gerstnersloanketteringgraduatesch"|`y'=="gerstnersloanketteringgraduateschbiomedscience"|`y'=="gerstnersloanketteringschbiomedscience"|`y'=="sloanketteringiforcancerresearch"
replace `x' = -77 if `y' == "georgiacollege"
replace `x' = -77 if `y' == "uhawaiikapiolanicommutycollege"
}
}
********************************************************************************
** REPLACE -55 FOR NO UNIVERSITY AFFILIATION: 0 observations
********************************************************************************
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
replace `x' = -55 if `y'=="don'tknowyetwillfindoutinmarch"|`y'=="applyingto:ucbucsbstanfordcaltech"|`y'=="notyetknown"|`y'=="null"||`y'=="undecided"|`y'=="unknowncurrentlyapplying"|`y'=="."
}
}
********************************************************************************
** REPLACE MISSING WITH IPEDS: 8493 observations
********************************************************************************
foreach x in ipeds_handmatch_cur {
foreach y in cur_clns_to_match {
set more off
replace `x' = 178402 if `y' == "umissourikansascity"
replace `x' = 178411 if `y' == "umissourirolla"|`y' == "missouriusciencetech"|`y' == "missouriurolla"
replace `x' = 178420 if `y' == "umissouristlouis"
replace `x' = 178396 if `y' == "missouriucolumbia"
replace `x' = 103112 if `y' == "alaskauof"|`y'=="ualaska"|`y'=="ualaskaanchorage"
replace `x' = 210562 if `y' == "alberteieincollegemed"
replace `x' = 201007 if `y' == "antiochnewenglandgradsch"
replace `x' = 104151 if `y' == "arizonastuphoenix"|`y'=="larizonastu"
replace `x' = 104179 if `y' == "arizonauof"|`y'=="uarizonadeptagbiosystemsengineering"
replace `x' = 106458 if `y' == "arkansasstu"
replace `x' = 104151 if `y' == "arizonastu"
replace `x' = 100858 if `y' == "auburnu"
replace `x' = 164988 if `y' == "bostonugraduatesch"|`y'=="bostonucharlesrivercampus"|`y'=="bostonueristy"|`y'=="bostonugraduateschartsscience"|`y'=="bostonuschmed"|`y'=="trusteesbostonu"
replace `x' = 201441 if `y' == "bowlinggreenstu"
replace `x' = 190567 if `y' == "brooklyncollege"|`y'=="cityunewyork"|`y'=="cunygraduateschucenter"|`y'=="cunyhuntercollege"|`y'=="cunymtsinaischmed"|`y'=="cunyqueenscollege"|`y'=="huntercollegecuny"|`y'=="thegraduatecentercuny"
replace `x' = 110714 if `y' == "californiacruzlick"
replace `x' = 110422 if `y' == "californiaorniapolystufoundation"|`y'=="californiaorniapolystupomona"|`y'=="californiaorniapolystusanluisobi"
replace `x' = 112765 if `y' == "californiaorniastufullertonfoundation"|`y'=="californiaorniastuhayward"
replace `x' = 110635 if `y' == "ucberkeleyucsfjointbioengineeringprogram"|`y'=="ucberkeleyucsanfranciscojointbioengineeringprogram"|`y'=="ucberkeley"|`y'=="ucb"|`y'=="californiauberkeley"|`y'=="gyucaliforniaorniaberkeley"|`y'=="theucaliforniaorniaberkeley"|`y'=="ucaliforniaornia"|`y'=="ucaliforniaorniaberkeleyucsanfranciscojointpr"
replace `x' = 110644 if `y' == "californiaudavis"|`y'=="eucaliforniaorniadavis"|`y'=="theucaliforniaorniadavis"
replace `x' = 110653 if `y' == "californiauirvine"
replace `x' = 110662 if `y' == "californiaulosangeles"|`y'=="ucaliforniaorniaa"|`y'=="uclahighereducationresearchi"
replace `x' = 110671 if `y' == "californiauriverside"|`y'=="ucaliforniaorniariversidefoundation"
replace `x' = 110680 if `y' == "californiausandiego"|`y'=="gyucaliforniaorniasandiego"|`y'=="oucaliforniaorniasandiego"|`y'=="scrippsioceanographyucaliforniaorniasandiego"|`y'=="scrippsioceanographyucsandiego"|`y'=="scrippsioceanographyucsd"|`y'=="scrippsioceanography"|`y'=="theucaliforniaorniasandiego"|`y'=="ucaliforniaorniasandiegorevellecollege"|`y'=="ucaliforniaorniasandiegoschmed"|`y'=="ucaliforniaorniasandiegoscrippsiocean"|`y'=="ucaliforniaorniasandiegoscrippsioceanography"|`y'=="ucaliforniaorniasandiegosio"
replace `x' = 110705 if `y' == "californiausantabarb"|`y'=="ucaliforniaorniasantabarb"
replace `x' = 110714 if `y' == "californiausantacruz"|`y'=="theucaliforniaorniasantacruz"
replace `x' = 110404 if `y' == "calitech"|`y'=="ecaliforniaorniaitech"|`y'=="gycaliforniaorniaitech"|`y'=="californiaorniastpolyu"
replace `x' = 110699 if `y' == "calusanfrancisco"|`y'=="gyucaliforniaorniasanfrancisco"|`y'=="ucaliforniaorniacenterhealthscila"|`y'=="ucaliforniaorniamedcentsanfran"|`y'=="ucsf"
replace `x' = 173258 if `y' == "carletonu"
replace `x' = 211440 if `y' == "carnegiemellonitech"|`y'=="ucarnegiemellonu"
replace `x' = 201645 if `y' == "casewestrnreserve"
replace `x' = 144050 if `y' == "chicagouof"|`y'=="uchicagopritzkerschmed"
replace `x' = 153603 if `y' == "chiowastu"
replace `x' = 206260 if `y' == "cincinnatiuof"|`y'=="ucincinnati"|`y'=="ucincinnaticollegemed"
replace `x' = 112251 if `y' == "claremontgraduatesch"|`y'=="ranchosantaanabotanicgardenclaremontgraduateu"
replace `x' = 436377 if `y' == "coldspringharborlab"
replace `x' = 231624 if `y' == "collegewilliammaryvirginiaimarinescience"|`y'=="williammaryc"
replace `x' = 126614 if `y' == "coloradouboulder"|`y'=="ucolorado"|`y'=="uucolorado"
replace `x' = 126562 if `y' == "coloudenvercenter"|`y'=="ucoloradodenverhealthsciencecenter"
replace `x' = 190150 if `y' == "columbaphyssurg"|`y'=="columbiabarnard"|`y'=="columbiau"|`y'=="columbiaucollegephysurg"|`y'=="columbiaucolphyssurgeons"|`y'=="columbiaucphys"|`y'=="columbiaulamontdohertyearthobservatory"|`y'=="columbiaupresbymedcenter"|`y'=="columbiauteacherscollege"|`y'=="teacherscollegecolumbiau"
replace `x' = 129020 if `y' == "uconnecticutmarinesciencei"|`y'=="connecticutuof"|`y'=="connuhealthcenter"|`y'=="uconnecticutaverypoint"|`y'=="uconnecticuthealthcenter"|`y'=="uconnecticutstorrs"
replace `x' = 190406 if `y' == "cornell"|`y'=="ecornellu"|`y'=="gycornellu"
replace `x' = 190424 if `y' == "weillcornellmedsch"|`y'=="weillgraduatecollegemedsciencecornellu"|`y'=="weillgraduateschmedsciencecornellu"|`y'=="cornelluweillcornellmedcollege"|`y'=="cornellweillgraduateschmedscience"|`y'=="joansanfordiweillmedcollegecornellu"
replace `x' = 190433 if `y' == "cornelluendowed"|`y'=="cornellumedcampus"|`y'=="cornellumedcol"|`y'=="cornellust"
replace `x' = 193900 if `y' == "courantimathscience"
replace `x' = 182670 if `y' == "dartmouthcollegethayerschengineering"|`y'=="dartmouthmedsch"
replace `x' = 130943 if `y' == "delawareuof"|`y'=="udelawarecollegemarinestudies"
replace `x' = 198419 if `y' == "dukeumedcenter"|`y'=="dukeuschmed"
replace `x' = 166027 if `y' == "harvardcollegepresidentfellowsof"|`y'=="eharvardu"|`y'=="gyharvardu"|`y'=="gyharvardumedsch"|`y'=="harvardmassachusettsitech"|`y'=="harvardmedsch"|`y'=="harvardmitdivhealthsci"|`y'=="harvardmitdivisionhealthsciencetech"|`y'=="harvardmitprog"|`y'=="harvardmitproghealthsci"|`y'=="harvardumedsch"|`y'=="harvarduschpublichealth"|`y'=="harvarduunk"|`y'=="lharvardu"|`y'=="paharvardu"|`y'=="sharvardu"
replace `x' = 139658 if `y' == "emoryuyerkesregionalprimateresearchcenter"
replace `x' = 240444 if `y' == "euwisconsinmadison"
replace `x' = 221999 if `y' == "evanderbiltu"|`y'=="vanderbiltuschmed"|`y'=="vanderbiltumedcenter"
replace `x' = 133650 if `y' == "floridaagmechu"
replace `x' = 134130 if `y' == "floridauof"|`y'=="gyuflorida"|`y'=="ufloridaifoodagric"
replace `x' = 139755 if `y' == "geogiaitech"|`y'=="georgiaiitutetech"|`y'=="georgiaitech"|`y'=="georgiaitechgeorgiatech"|`y'=="georgiatechresearchcorporation"|`y'=="georgiatechresearchcorporationgeorgiaitech"|`y'=="gygeorgiaitech"|`y'=="jointgeorgiaitech"|`y'=="medcollegegeorgiaresearchiinc"|`y'=="thegeorgiaitech"|`y'=="ugeorgiaitech"
replace `x' = 139959 if `y' == "georgiauof"
replace `x' = 131469 if `y' == "geowashingtonu"|`y'=="georgewashingtonuthe"
replace `x' = 166683 if `y' == "gymassachusettsitech"|`y'=="lmassachusettsitech"|`y'=="massachussettsitech"|`y'=="massitech"|`y'=="mit"|`y'=="omassachusettsitech"|`y'=="umassachusettsitech"
replace `x' = 186131 if `y' == "gyprincetonu"
replace `x' = 243744 if `y' == "ystanfordu"|`y'=="gystanfordu"|`y'=="lstanfordu"|`y'=="ostanfordu"|`y'=="stanfordfdresi"|`y'=="stanfordschmed"|`y'=="stanforduhopkinsmarinestation"|`y'=="stanforduscheng"|`y'=="stanforduschmed"
replace `x' = 104179 if `y' == "gyuarizona"
replace `x' = 174066 if `y' == "gyuminnesotaminneapolis"|`y'=="minnesotauminneapl"|`y'=="uminnesota"|`y'=="uminnesotaitech"|`y'=="uminnesotaminneapolis"
replace `x' = 174066 if `y' == "uminnesotatwincit"
replace `x' = 215062 if `y' == "gyupennsylvania"|`y'=="pennsylvaniauof"
replace `x' = 236948 if `y' == "gyuwashington"|`y'=="uwgraduatesch"
replace `x' = 130794 if `y' == "gyyaleu"|`y'=="yaleuschforestryenvironmentalstudies"|`y'=="yaleuschmed"|`y'=="yaleuyaleschmed"
replace `x' = 141963 if `y' == "hawaiiuof"|`y'=="hawaiiusystemoff"|`y'=="uhawaii"|`y'=="uhawaiihonolulucc"
replace `x' = 225511 if `y' == "houstonuof"
replace `x' = 115755 if `y' == "humboldtstufoundation"
replace `x' = 142285 if `y' == "idahouof"
replace `x' = 145813 if `y' == "illinoisstunormal"|`y'=="illinoisuunknown"
replace `x' = 145637 if `y' == "illuurbanachamp"|`y'=="luillinoisurbanachampaign"|`y'=="uillinoisurbanachampaign"
replace `x' = 151351 if `y' == "indianau"|`y'=="indianaubloomngton"
replace `x' = 151102 if `y' == "indianaupurduefortwayne"
replace `x' = 151111 if `y' == "indianauschmed"
replace `x' = 153658 if `y' == "iowauof"|`y'=="uiowacollegemed"
replace `x' = 162928 if `y' == "johnshopkinsbloombergschpublichealth"|`y'=="jhopkinsmedi"|`y'=="johnshopkinsmedi"|`y'=="johnshopkinsschmed"|`y'=="johnshopkinsukriegerschartsscience"|`y'=="johnshopkinsumedi"|`y'=="johnshopkinsupeabodyi"|`y'=="johnshopkinsuschhygienepublichealth"|`y'=="johnshopkinsuschmed"
replace `x' = 155256 if `y' == "kansasuof"|`y'=="ukansasmaincampus"
replace `x' = 203517 if `y' == "kentstu"
replace `x' = 157085 if `y' == "kentuckyuof"|`y'=="ukentuckylexington"|`y'=="ukentuckyresearchfoundation"
replace `x' = 165334 if `y' == "lclarku"
replace `x' = 146676 if `y' == "lincolnc"
replace `x' = 159391 if `y' == "louisianastuamcollege"
replace `x' = 159373 if `y' == "louisianastuschmedneworl"
replace `x' = 435000 if `y' == "louisianastuschmedshrevep"
replace `x' = 159647 if `y' == "louisianatechunversity"
replace `x' = 157289 if `y' == "louisvilleuof"|`y'=="ulouisvilleresearchfoundationinc"|`y'=="ulouisvilleschmed"
replace `x' = 141574 if `y' == "hawaiiimarinebiologyuhawaiimanoa"
replace `x' = 435338 if `y' == "lscrippsresearchi"
replace `x' = 170976 if `y' == "lumichigan"|`y'=="michiganuof"|`y'=="umichigan"|`y'=="umichiganmedsch"
replace `x' = 161253 if `y' == "maineuorono"|`y'=="umaineorono"
replace `x' = 163268 if `y' == "marylandubaltcnty"
replace `x' = 163286 if `y' == "marylanduof"|`y'=="umaryland"
replace `x' = 232681 if `y' == "marywashingtoncollege"
replace `x' = 166629 if `y' == "massuofamherst"|`y'=="umassachusettsamherst"|`y'=="umassachusettsmedsch"
replace `x' = 167987 if `y' == "umassachusettsdartmouth"
replace `x' = 166638 if `y' == "umassachusettsharborcampus"
replace `x' = 166513 if `y' == "umassachusettslowell"
replace `x' = 166708 if `y' == "umassachusettsworcester"
replace `x' = 173948 if `y' == "mayoclinicrochester"|`y'=="mayograduatesch"
replace `x' = 204024 if `y' == "miamiu"|`y'=="miamiuoxfordcampus"
replace `x' = 135726 if `y' == "miamiuof"|`y'=="umiamirosenstielschmarineatmosphericsci"|`y'=="umiamischmed"
replace `x' = 171128 if `y' == "michigantechu"
replace `x' = 171137 if `y' == "michiganudearborn"
replace `x' = 180489 if `y' == "montanauof"|`y'=="umontana"|`y'=="umontanawestern"
replace `x' = 199102 if `y' == "ncagtechstu"|`y'=="northcarolinaagtechnicalstu"
replace `x' = 199193 if `y' == "ncsturaleigh"|`y'=="northcarolinastu"
replace `x' = 199120 if `y' == "ncuofchapelhill"|`y'=="unorthcarolina"|`y'=="theunorthcarolinachapelhill"
replace `x' = 199218 if `y' == "ncuofwilmington"
replace `x' = 181464 if `y' == "nebraskaulincoln"
replace `x' = 181428 if `y' == "nebraskaumedcenter"
replace `x' = 182290 if `y' == "nevadauof"|`y'=="unevada"|`y'=="unevadarenofleischmannplanetarium"
replace `x' = 183044 if `y' == "newhampshireuof"|`y'=="unewhampshire"|`y'=="unewhampshiredurham"
replace `x' = 188012 if `y' == "newmexicostu"
replace `x' = 187985 if `y' == "newmexicouof"|`y'=="unewmexico"
replace `x' = 193654 if `y' == "newschforsocialresearch"|`y'=="newschu"
replace `x' = 194152 if `y' == "newyorkstcollegeceramicsalfredu"
replace `x' = 193900 if `y' == "newyorkucourantimathematicalscience"|`y'=="newyorkumedcenter"|`y'=="newyorkuschmed"
replace `x' = 187967 if `y' == "nmeximiningtech"
replace `x' = 200332 if `y' == "northdakotastu"|`y'=="northdakotastufargo"
replace `x' = 147749 if `y' == "northwesternuchicagocampus"|`y'=="northwesternuchicago"
replace `x' = 152080 if `y' == "notredameuof"
replace `x' = 136215 if `y' == "novaseuoceancenter"
replace `x' = 204796 if `y' == "ohiostu"|`y'=="ohiosturesearchfoundation"|`y'=="theohiostu"
replace `x' = 204857 if `y' == "ohiou"
replace `x' = 207388 if `y' == "oklahomastu"
replace `x' = 207430 if `y' == "oklahomauof"|`y'=="oklauhealthsci"|`y'=="uoklahoma"
replace `x' = 209551 if `y' == "oregonuof"|`y'=="uoregoneugene"
replace `x' = 228778 if `y' == "outexasaustin"|`y'=="texasuaustin"|`y'=="utexasaustin"
replace `x' = 214777 if `y' == "pennsylvaniastu"|`y'=="thepennsylvaniastu"|`y'=="pennsylvaniastuupark"|`y'=="pennstcollegemed"|`y'=="pennstuupark"|`y'=="pennstuhershymed"|`y'=="pennstuhersheymedcntr"|`y'=="pennstugreatvalley"|`y'=="pennstucapitalcollege"|`y'=="pennstu"|`y'=="pennsylvaniastucommonwealthcollege"|`y'=="schreyerhonorscollegepennsylvaniastu"
replace `x' = 214227 if `y' == "pennsylvaniastberkslehighvalleycollege"
replace `x' = 215293 if `y' == "pittsburghuof"|`y'=="upittsburgh"
replace `x' = 194541 if `y' == "polyunewyork"
replace `x' = 152266 if `y' == "purdueunorthcenteral"
replace `x' = 243780 if `y' == "purdueuscheng"
replace `x' = 217484 if `y' == "rhodeislanduof"|`y'=="urhodeislandschoceanograph"
replace `x' = 195030 if `y' == "rochesteruof"|`y'=="urochestermedsch"
replace `x' = 186371 if `y' == "rutgersstunewark"|`y'=="rutgersstunewbrunswick"|`y'=="rutgersu"|`y'=="rutgersunewark"|`y'=="rutgersunewbrunswick"
replace `x' = 122436 if `y' == "sandiegouof"
replace `x' = 122409 if `y' == "sandiegostufoundation"|`y'=="sandiegostuucaliforniaorniadavis"
replace `x' = 435338 if `y' == "lscrippsresearchi"|`y'=="thescrippsresearchi"|`y'=="scrippsresi"|`y'=="thescrippsresearchiflorida"|`y'=="thescrippsresearchikelloggschsciencetech"
replace `x' = 219347 if `y' == "sdschminestech"
replace `x' = 218663 if `y' == "southcarolinauof"|`y'=="usouthcarolina"|`y'=="usouthcarolinaschmed"
replace `x' = 123961 if `y' == "southerncaliforniauof"|`y'=="usoutherncallosangeles"
replace `x' = 149222 if `y' == "southernillinoisu"
replace `x' = 228246 if `y' == "sthrnmethodistu"
replace `x' = 179159 if `y' == "stlouisu"
replace `x' = 196255 if `y' == "stunewyork"|`y'=="theuthestnewyork"
replace `x' = 196130 if `y' == "sunybuffalo"
replace `x' = 196282 if `y' == "stunystonybrook"|`y'=="sunystbrkhthsci"|`y'=="sunystonybrook"
replace `x' = 196103 if `y' == "sunyenvironscienceforestry"
replace `x' = 196307 if `y' == "sunyupstmedu"
replace `x' = 221759 if `y' == "tennuknoxville"
replace `x' = 228723 if `y' == "texasamumaincampus"|`y'=="ustexasamu"
replace `x' = 229090 if `y' == "texasudallas"|`y'=="texasuelpaso"
replace `x' = 195049 if `y' == "therockefelleru"
replace `x' = 199139 if `y' == "theunorthcarolinacharlotte"
replace `x' = 221759 if `y' == "theutennesseememphishealthsciencecenter"
replace `x' = 216366 if `y' == "thomasjeffrsonu"
replace `x' = 168148 if `y' == "tuftsuschmed"
replace `x' = 160755 if `y' == "tulaneu"|`y'=="tulaneula"|`y'=="tulaneuschmed"
replace `x' = 200800 if `y' == "uakron"
replace `x' = 100733 if `y' == "ualabama"|`y'=="ualabamatuscaloosa"
replace `x' = 100706 if `y' == "ualabamahuntsville"|`y'=="theualabamainhuntsville"
replace `x' = 102614 if `y' == "ualaskafairbankscampus"|`y'=="ualaskafairbanksjuneaucampus"
replace `x' = 106397 if `y' == "uarkansasfayetteville"|`y'=="uarkansasmedscience"|`y'=="uarkansasmedsciencecampus"
replace `x' = 445188 if `y' == "ucmerced"
replace `x' = 145600 if `y' == "uillinois"
replace `x' = 176017 if `y' == "umississippi"
replace `x' = 181428 if `y' == "unebraskaomaha"
replace `x' = 200280 if `y' == "unorthdakotamaincampus"
replace `x' = 215062 if `y' == "upaschmed"|`y'=="upennsylvaniaschmed"|`y'=="upennsylvaniawhartonschfinancecom"
replace `x' = 251154 if `y' == "upuertoricomayaguez"|`y'=="prumayaguez"|`y'=="upuertoricoriopiedras"|`y'=="upuertoricomedsci"
replace `x' = 181002 if `y' == "uscreightonu"
replace `x' = 448840 if `y' == "usouthfloridastpetersburg"
replace `x' = 230764 if `y' == "utahuof"
replace `x' = 221759 if `y' == "utennesseehealthsciencecenter"|`y'=="utennesseeknoxville"|`y'=="utennesseespacei"
replace `x' = 229090 if `y' == "utexasarlington"|`y'=="utexasdallas"|`y'=="utexaselpaso"|`y'=="utexasmedbrchgalveston"
replace `x' = 416801 if `y' == "utexascancercenterhouston"|`y'=="utexashealthsciencecenterhouston"|`y'=="utexashealthscihouston"|`y'=="utexashealthsciencecentersanantonio"|`y'=="utexashealthscisanantonio"|`y'=="utexassanantonio"|`y'=="utexassouthwesternmedcenter"|`y'=="utexassouthwesternmedcenterdallas"|`y'=="utsouthwesternmedcenter"
replace `x' = 231174 if `y' == "uvermontburlington"|`y'=="uvermontstagcollege"|`y'=="vermontuof"
replace `x' = 234076 if `y' == "uvirginia"|`y'=="uvirginiacharlottesville"|`y'=="virginiauof"
replace `x' = 233921 if `y' == "uvirginiapolyistu"|`y'=="vapolyistu"|`y'=="vapolyitateu"|`y'=="virginiatech"|`y'=="virginiapolyitateun"
replace `x' = 236948 if `y' == "uwashington"|`y'=="uwashingtonschmed"|`y'=="washingtonuof"
replace `x' = 240055 if `y' == "uwisconsin"|`y'=="uwisconsinschmed"|`y'=="wisconsinumadison"
replace `x' = 234030 if `y' == "virginiacommonwealthumedcoll"
replace `x' = 199847 if `y' == "wakeforestubowmangrayschmed"
replace `x' = 236939 if `y' == "washingtonstuspokane"|`y'=="washingtonstuvancouver"
replace `x' = 179867 if `y' == "washingtonuinstlouisschmed"|`y'=="washingtonu"|`y'=="washingtonuschmed"
replace `x' = 238032 if `y' == "westvirginiauresearchcorporation"
replace `x' = 227757 if `y' == "williammarshriceu"
replace `x' = 166610 if `y' == "woodsholeoceanographic"|`y'=="woodsholeoceanographici"|`y'=="woodsholeoceanographici+mitjointprogram"|`y'=="woodsholeoceanographicibiologicaloceanography"|`y'=="woodsholeocni"
replace `x' = 206598 if `y' == "wrightstu"
replace `x' = 240727 if `y' == "wyominguof"
replace `x' = 197708 if `y' == "yeshivauaeieincollegemed"
replace `x' = 219471 if `y' == "usouthdakotamaincampus"
replace `x' = 194310 if `y' == "paceu"
replace `x' = 213020 if `y' == "indianaupennsylvania"
replace `x' = 206589 if `y' == "collegewooster"
replace `x' = 110422 if `y' == "californiaorniapolystu"
replace `x' = 206084 if `y' == "utoledo"
replace `x' = 121628 if `y' == "pardeerandgraduatesch"
** additional matches for Current Institution
replace `x' = 133881 if `y' == "floridaitech"
replace `x' = 180461 if `y' == "montanastu"
replace `x' = 106397 if `y' == "uarkansas"
replace `x' = 126614 if `y' == "ucoloradoboulder"
replace `x' = 126562 if `y' == "ucoloradodenver"
replace `x' = 155317 if `y' == "ukansasmaincampus"
replace `x' = 196088 if `y' == "sunybuffalo"
}
}
gen double ipeds_cur = ipeds_cur_id
gen dup_iped = 1 if ipeds_cur == ipeds_handmatch_cur & ipeds_cur !=.
replace ipeds_cur = ipeds_handmatch_cur if ipeds_handmatch_cur !=. & dup_iped!=1
lab var ipeds_cur "Cur IPEDS ID, final match for grfp_cur"
count if ipeds_cur !=.
count if ipeds_cur !=. & ipeds_cur > 0
count if ipeds_handmatch_cur !=. & dup_iped !=1

********************************************************************************
** 30491 possible Current Institutions
** 30169 have some ipeds id (this includes -99, -77, -55): 98.94% Match Rate
** 29854 have ipeds id: 97.91% Match Rate
** 21911 was exact match (1st round)
** 8258 was string match (2nd round) 
********************************************************************************

save "$dir/GRFP/GRFP list with uni & cur IPEDS.dta",replace

********************************************************************************
clear matrix
clear all 
use "$dir/GRFP/GRFP list with uni & cur IPEDS.dta"
sum ipeds_cur ipeds if ipeds_cur > 0 & ipeds > 0
corr ipeds_cur ipeds if ipeds_cur > 0 & ipeds > 0 
** 0.8337 correlation
gen no_change = 1 if ipeds_cur == ipeds & ipeds!=. & ipeds>0
corr ipeds_cur ipeds if no_change == 1
lab var no_change "Binary: Proposed Institution IPEDS = Current Institution IPEDS"
count if no_change == 1 & grfp_year > 2004 & grfp_year < 2009
count if grfp_year > 2004 & grfp_year < 2009
bysort offered_award: count if no_change == 1 & grfp_year > 2004 & grfp_year < 2009
bysort offered_award: count if grfp_year > 2004 & grfp_year < 2009
/* 
	7171 list the same institution over this period out of 10565 (67.87 percent)
	4661: honorable mentions (6796 total, 68.58%)
	2510: awardees (3766 total, 66.64%)
	These results are useful for us, noting that mis-reporting is consistent across both types of awards.
*/
save "$dir/GRFP/GRFP list with uni & cur IPEDS final.dta",replace	
********************************************************************************
}
***** STEP 4: NRC: Clean up university string
{
/* NRC Data Set: 221 unique university observations in NRC survey: 
this represents 212 universities. there were 9 combinations of multiple universities.
data pulled from http://www.nap.edu/rdp/
*/

clear all 
import excel "$dir/ResDocTableMac8_4-29-11.xls", sheet("Master") cellrange(A1:BT5005) firstrow clear
set more off
destring,replace
save "$dir/NRC 05 Survey/NRC survey original.dta", replace
** 5004 unique observations
clear all
use "$dir/NRC 05 Survey/NRC survey original.dta"
********************************************************************************
** Clean up variable names **

rename InstitutionName nrc_uni
lab var nrc_uni "NRC Institution"
rename BroadField nrc_broad_department
lab var nrc_broad_department "Broad Field (NRC)"
rename Field nrc_department
lab var nrc_department "Department (NRC)"

** Clean up institution names: lower case, trim spaces on the end of string variable
** Create list of institutions 
gen nrc_uni_clean_full = trim(itrim(lower(nrc_uni)))

** Clean up name of institution
gen nrc_uni_to_clean = nrc_uni_clean_full
foreach x in nrc_uni_to_clean {
set more off
replace `x' = subinstr(`x', " of ", "", .)
replace `x' = subinstr(`x', " at ", "", .)
replace `x' = subinstr(`x', "coll ", "college", .)
replace `x' = subinstr(`x', "col ", "college", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', " the ", "", .)
replace `x' = subinstr(`x', " and ", "", .)
replace `x' = subinstr(`x', "&", "", .)
replace `x' = subinstr(`x', "university", "u", .)
replace `x' = subinstr(`x', "univ", "u", .)
replace `x' = subinstr(`x', "uni", "u", .)
replace `x' = subinstr(`x', "institute", "i", .)
replace `x' = subinstr(`x', "institution", "i", .)
replace `x' = subinstr(`x', "insts", "i", .)
replace `x' = subinstr(`x', "inst", "i", .)
replace `x' = subinstr(`x', " ins", "i", .)
replace `x' = subinstr(`x', "technology", "tech", .)
replace `x' = subinstr(`x', "-", "", .)
replace `x' = subinstr(`x', ",", "", .)
replace `x' = subinstr(`x', "/", "", .)
replace `x' = subinstr(`x', "(", "", .)
replace `x' = subinstr(`x', ")", "", .)
replace `x' = subinstr(`x', ".", "", .)
replace `x' = subinstr(`x', "state", "st", .)
replace `x' = subinstr(`x', " s ", "st", .)
replace `x' = subinstr(`x', "school", "sch", .)
replace `x' = subinstr(`x', "schl", "sch", .)
replace `x' = subinstr(`x', "centr", "center", .)
replace `x' = subinstr(`x', "ctr", "center", .)
replace `x' = subinstr(`x', "calif", "california", .)
replace `x' = subinstr(`x', "rsrve", "reserve", .)
replace `x' = subinstr(`x', "medical", "med", .)
replace `x' = subinstr(`x', "medicine", "med", .)
replace `x' = subinstr(`x', "ga ", "georgia", .)
replace `x' = subinstr(`x', "hlth", "health", .)
replace `x' = subinstr(`x', "laboratory", "lab", .)
replace `x' = subinstr(`x', "labs", "lab", .)
replace `x' = subinstr(`x', "physicians", "phys", .)
replace `x' = subinstr(`x', "physician", "phys", .)
replace `x' = subinstr(`x', "agriculture", "ag", .)
replace `x' = subinstr(`x', "agricultural", "ag", .)
replace `x' = subinstr(`x', " sci ", "science", .)
replace `x' = subinstr(`x', "sciences", "science", .)
replace `x' = subinstr(`x', "int'l", "international", .)
replace `x' = subinstr(`x', "intl", "international", .)
replace `x' = subinstr(`x', " engineering ", "eng", .)
replace `x' = subinstr(`x', " cnty ", "county", .)
replace `x' = subinstr(`x', " okla ", "oklahoma", .)
replace `x' = subinstr(`x', " va ", "virginia", .)
replace `x' = subinstr(`x', " ill ", "illinois", .)
replace `x' = subinstr(`x', "@", "", .)
replace `x' = subinstr(`x', "canada", "", .)
replace `x' = subinstr(`x', "polytechnic", "poly", .)
replace `x' = subinstr(`x', "polytech", "poly", .)
replace `x' = subinstr(`x', "saint", "st", .)
* remove spaces
gen `x'_nospace = subinstr(`x', " ","", .)
sort `x'_nospace
egen nrc_group_clean = group(`x')
egen nrc_group_clns = group(`x'_nospace)
}
rename nrc_uni_to_clean nrc_uni_cleaned
rename nrc_uni_to_clean_nospace nrc_uni_clns
egen nrc_group_original = group(nrc_uni_clean_full)
lab var nrc_group_original "Unique uni group ID, based on full nrc name"
lab var nrc_group_clean "Unique uni group ID, based on clean nrc name"
lab var nrc_group_clns "Unique uni group ID, based on clean nrc name with no spaces"
br nrc_uni_clean_full nrc_uni_cl*
sum nrc_group*
** 221 original groups, 221 cleaned groups, 221 cleaned & nospace **
sort nrc_group_clns
by nrc_group_clns: gen nrc_clns_counter = _n
by nrc_group_clns: gen nrc_clns_tally = _N
lab var nrc_clns_tally "number of obs within university, derived from nrc clean no space data"
gen uni_clns_to_match = nrc_uni_clns
** uni_clns_to_match: MATCH WITH IPEDS **
********************************************************************************
save "$dir/NRC 05 Survey/NRC list clean round1.dta",replace
}
***** STEP 5: Merge IPEDS to NRC Institution
{
clear all 
use "$dir/NRC 05 Survey/NRC list clean round1.dta"
sort uni_clns_to_match
merge m:1 uni_clns_to_match using "$dir/IPEDS/IPEDS list clean round2.dta"
** University-Program level match procedure ** matched 3918 NRC departments; need to match 1086 remaining projects
drop if _merge == 2
**Matching procedute in DTA file: ipeds # (if match); -99 = foreign; -77 unable to identify institution; -55 unknown
gen ipeds_handmatch = .
********************************************************************************
** REPLACE -77 FOR UNIVERSITIES WITH NO IPEDS: 1 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="alabamaubirmingham"|`y'=="aberystwythu"|`y'=="americanmuseumnaturalhistory"|`y'=="architeccturalassociation"|`y'=="binghamtonu"|`y'=="collegeforindustrialengrsiqs"|`y'=="costaricauof"|`y'=="factoryphysicsinc"|`y'=="hawaiieastwestcenter"|`y'=="meddentnjrwjohnsonpsctaway"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="missouriusciencetech"|`y'=="montereyiforinternationalstd"|`y'=="oregongraduatei"|`y'=="oregongraduateiciencetech"|`y'=="oxfordpoly"|`y'=="proposedgraduatei"|`y'=="schadvancedinternationalstudies"|`y'=="soclstudiesi"|`y'=="southwesttexasstu"|`y'=="texuhealthscihoust"|`y'=="texumedbrgalvstn"|`y'=="texuswstrnmedcenter"|`y'=="umeddentnjpiscataway"|`y'=="usouthwesternlouisiana"|`y'=="virginiaimarinescience"|`y'=="virginiatechassociates"|`y'=="westvirginiaitech"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
replace `x' = -77 if `y'=="lelandstanfordjru"|`y'=="newyorkbotanicalgarden"|`y'=="richardgildergraduatesch"|`y'=="simon'srockbardcollege"|`y'=="memorialsloanketteringcancercenter"|`y'=="gerstnersloanketteringgraduatesch"|`y'=="gerstnersloanketteringgraduateschbiomedscience"|`y'=="gerstnersloanketteringschbiomedscience"|`y'=="sloanketteringiforcancerresearch"
replace `x' = -77 if `y' == "georgiacollege"
replace `x' = -77 if `y' == "uhawaiikapiolanicommutycollege"
}
}
********************************************************************************
** REPLACE MISSING WITH IPEDS: 985 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
set more off
replace `x' = 178402 if `y' == "umissourikansascity"
replace `x' = 178411 if `y' == "umissourirolla"|`y' == "missouriusciencetech"|`y' == "missouriurolla"
replace `x' = 178420 if `y' == "umissouristlouis"
replace `x' = 178396 if `y' == "missouriucolumbia"
replace `x' = 103112 if `y' == "alaskauof"|`y'=="ualaska"|`y'=="ualaskaanchorage"
replace `x' = 210562 if `y' == "alberteieincollegemed"
replace `x' = 201007 if `y' == "antiochnewenglandgradsch"
replace `x' = 104151 if `y' == "arizonastuphoenix"|`y'=="larizonastu"
replace `x' = 104179 if `y' == "arizonauof"|`y'=="uarizonadeptagbiosystemsengineering"
replace `x' = 106458 if `y' == "arkansasstu"
replace `x' = 100858 if `y' == "auburnu"
replace `x' = 164988 if `y' == "bostonugraduatesch"|`y'=="bostonucharlesrivercampus"|`y'=="bostonueristy"|`y'=="bostonugraduateschartsscience"|`y'=="bostonuschmed"|`y'=="trusteesbostonu"
replace `x' = 201441 if `y' == "bowlinggreenstu"
replace `x' = 190567 if `y' == "brooklyncollege"|`y'=="cityunewyork"|`y'=="cunygraduateschucenter"|`y'=="cunyhuntercollege"|`y'=="cunymtsinaischmed"|`y'=="cunyqueenscollege"|`y'=="huntercollegecuny"|`y'=="thegraduatecentercuny"
replace `x' = 110714 if `y' == "californiacruzlick"
replace `x' = 110422 if `y' == "californiaorniapolystufoundation"|`y'=="californiaorniapolystupomona"|`y'=="californiaorniapolystusanluisobi"
replace `x' = 112765 if `y' == "californiaorniastufullertonfoundation"|`y'=="californiaorniastuhayward"
replace `x' = 110635 if `y' == "ucberkeleyucsfjointbioengineeringprogram"|`y'=="ucberkeleyucsanfranciscojointbioengineeringprogram"|`y'=="ucberkeley"|`y'=="ucb"|`y'=="californiauberkeley"|`y'=="gyucaliforniaorniaberkeley"|`y'=="theucaliforniaorniaberkeley"|`y'=="ucaliforniaornia"|`y'=="ucaliforniaorniaberkeleyucsanfranciscojointpr"
replace `x' = 110644 if `y' == "californiaudavis"|`y'=="eucaliforniaorniadavis"|`y'=="theucaliforniaorniadavis"
replace `x' = 110653 if `y' == "californiauirvine"
replace `x' = 110662 if `y' == "californiaulosangeles"|`y'=="ucaliforniaorniaa"|`y'=="uclahighereducationresearchi"
replace `x' = 110671 if `y' == "californiauriverside"|`y'=="ucaliforniaorniariversidefoundation"
replace `x' = 110680 if `y' == "californiausandiego"|`y'=="gyucaliforniaorniasandiego"|`y'=="oucaliforniaorniasandiego"|`y'=="scrippsioceanographyucaliforniaorniasandiego"|`y'=="scrippsioceanographyucsandiego"|`y'=="scrippsioceanographyucsd"|`y'=="scrippsioceanography"|`y'=="theucaliforniaorniasandiego"|`y'=="ucaliforniaorniasandiegorevellecollege"|`y'=="ucaliforniaorniasandiegoschmed"|`y'=="ucaliforniaorniasandiegoscrippsiocean"|`y'=="ucaliforniaorniasandiegoscrippsioceanography"|`y'=="ucaliforniaorniasandiegosio"
replace `x' = 110705 if `y' == "californiausantabarb"|`y'=="ucaliforniaorniasantabarb"
replace `x' = 110714 if `y' == "californiausantacruz"|`y'=="theucaliforniaorniasantacruz"
replace `x' = 110404 if `y' == "calitech"|`y'=="ecaliforniaorniaitech"|`y'=="gycaliforniaorniaitech"
replace `x' = 110699 if `y' == "calusanfrancisco"|`y'=="gyucaliforniaorniasanfrancisco"|`y'=="ucaliforniaorniacenterhealthscila"|`y'=="ucaliforniaorniamedcentsanfran"|`y'=="ucsf"
replace `x' = 173258 if `y' == "carletonu"
replace `x' = 211440 if `y' == "carnegiemellonitech"|`y'=="ucarnegiemellonu"
replace `x' = 201645 if `y' == "casewestrnreserve"
replace `x' = 144050 if `y' == "chicagouof"|`y'=="uchicagopritzkerschmed"
replace `x' = 153603 if `y' == "chiowastu"
replace `x' = 206260 if `y' == "cincinnatiuof"|`y'=="ucincinnati"|`y'=="ucincinnaticollegemed"
replace `x' = 112251 if `y' == "claremontgraduatesch"|`y'=="ranchosantaanabotanicgardenclaremontgraduateu"
replace `x' = 436377 if `y' == "coldspringharborlab"
replace `x' = 231624 if `y' == "collegewilliammaryvirginiaimarinescience"|`y'=="williammaryc"
replace `x' = 126614 if `y' == "coloradouboulder"|`y'=="ucolorado"|`y'=="uucolorado"
replace `x' = 126562 if `y' == "coloudenvercenter"|`y'=="ucoloradodenverucoloradodenverhealthsciencecenter"
replace `x' = 190150 if `y' == "columbaphyssurg"|`y'=="columbiabarnard"|`y'=="columbiau"|`y'=="columbiaucollegephysurg"|`y'=="columbiaucolphyssurgeons"|`y'=="columbiaucphys"|`y'=="columbiaulamontdohertyearthobservatory"|`y'=="columbiaupresbymedcenter"|`y'=="columbiauteacherscollege"|`y'=="teacherscollegecolumbiau"
replace `x' = 129020 if `y' == "uconnecticutmarinesciencei"|`y'=="connecticutuof"|`y'=="connuhealthcenter"|`y'=="uconnecticutaverypoint"|`y'=="uconnecticuthealthcenter"|`y'=="uconnecticutstorrs"
replace `x' = 190406 if `y' == "cornell"|`y'=="ecornellu"|`y'=="gycornellu"
replace `x' = 190424 if `y' == "weillcornellmedsch"|`y'=="weillgraduatecollegemedsciencecornellu"|`y'=="weillgraduateschmedsciencecornellu"|`y'=="cornelluweillcornellmedcollege"|`y'=="cornellweillgraduateschmedscience"|`y'=="joansanfordiweillmedcollegecornellu"
replace `x' = 190433 if `y' == "cornelluendowed"|`y'=="cornellumedcampus"|`y'=="cornellumedcol"|`y'=="cornellust"
replace `x' = 193900 if `y' == "courantimathscience"
replace `x' = 182670 if `y' == "dartmouthcollegethayerschengineering"|`y'=="dartmouthmedsch"
replace `x' = 130943 if `y' == "delawareuof"|`y'=="udelawarecollegemarinestudies"
replace `x' = 198419 if `y' == "dukeumedcenter"|`y'=="dukeuschmed"
replace `x' = 166027 if `y' == "harvardcollegepresidentfellowsof"|`y'=="eharvardu"|`y'=="gyharvardu"|`y'=="gyharvardumedsch"|`y'=="harvardmassachusettsitech"|`y'=="harvardmedsch"|`y'=="harvardmitdivhealthsci"|`y'=="harvardmitdivisionhealthsciencetech"|`y'=="harvardmitprog"|`y'=="harvardmitproghealthsci"|`y'=="harvardumedsch"|`y'=="harvarduschpublichealth"|`y'=="harvarduunk"|`y'=="lharvardu"|`y'=="paharvardu"|`y'=="sharvardu"
replace `x' = 139658 if `y' == "emoryuyerkesregionalprimateresearchcenter"
replace `x' = 240444 if `y' == "euwisconsinmadison"
replace `x' = 221999 if `y' == "evanderbiltu"|`y'=="vanderbiltuschmed"|`y'=="vanderbiltumedcenter"
replace `x' = 133650 if `y' == "floridaagmechu"
replace `x' = 134130 if `y' == "floridauof"|`y'=="gyuflorida"|`y'=="ufloridaifoodagric"
replace `x' = 139755 if `y' == "geogiaitech"|`y'=="georgiaiitutetech"|`y'=="georgiaitech"|`y'=="georgiaitechgeorgiatech"|`y'=="georgiatechresearchcorporation"|`y'=="georgiatechresearchcorporationgeorgiaitech"|`y'=="gygeorgiaitech"|`y'=="jointgeorgiaitech"|`y'=="medcollegegeorgiaresearchiinc"|`y'=="thegeorgiaitech"|`y'=="ugeorgiaitech"
replace `x' = 139959 if `y' == "georgiauof"
replace `x' = 131469 if `y' == "geowashingtonu"|`y'=="georgewashingtonuthe"
replace `x' = 166683 if `y' == "gymassachusettsitech"|`y'=="lmassachusettsitech"|`y'=="massachussettsitech"|`y'=="massitech"|`y'=="mit"|`y'=="omassachusettsitech"|`y'=="umassachusettsitech"
replace `x' = 186131 if `y' == "gyprincetonu"
replace `x' = 243744 if `y' == "ystanfordu"|`y'=="gystanfordu"|`y'=="lstanfordu"|`y'=="ostanfordu"|`y'=="stanfordfdresi"|`y'=="stanfordschmed"|`y'=="stanforduhopkinsmarinestation"|`y'=="stanforduscheng"|`y'=="stanforduschmed"
replace `x' = 104179 if `y' == "gyuarizona"
replace `x' = 174066 if `y' == "gyuminnesotaminneapolis"|`y'=="minnesotauminneapl"|`y'=="uminnesota"|`y'=="uminnesotaitech"|`y'=="uminnesotaminneapolis"
replace `x' = 174066 if `y' == "uminnesotatwincit"
replace `x' = 215062 if `y' == "gyupennsylvania"|`y'=="pennsylvaniauof"
replace `x' = 236948 if `y' == "gyuwashington"|`y'=="uwgraduatesch"
replace `x' = 130794 if `y' == "gyyaleu"|`y'=="yaleuschforestryenvironmentalstudies"|`y'=="yaleuschmed"|`y'=="yaleuyaleschmed"
replace `x' = 141963 if `y' == "hawaiiuof"|`y'=="hawaiiusystemoff"|`y'=="uhawaii"|`y'=="uhawaiihonolulucc"
replace `x' = 225511 if `y' == "houstonuof"
replace `x' = 115755 if `y' == "humboldtstufoundation"
replace `x' = 142285 if `y' == "idahouof"
replace `x' = 145813 if `y' == "illinoisstunormal"|`y'=="illinoisuunknown"
replace `x' = 145637 if `y' == "illuurbanachamp"|`y'=="luillinoisurbanachampaign"|`y'=="uillinoisurbanachampaign"
replace `x' = 151351 if `y' == "indianau"|`y'=="indianaubloomngton"
replace `x' = 151102 if `y' == "indianaupurduefortwayne"
replace `x' = 151111 if `y' == "indianauschmed"
replace `x' = 153658 if `y' == "iowauof"|`y'=="uiowacollegemed"
replace `x' = 162928 if `y' == "johnshopkinsbloombergschpublichealth"|`y'=="jhopkinsmedi"|`y'=="johnshopkinsmedi"|`y'=="johnshopkinsschmed"|`y'=="johnshopkinsukriegerschartsscience"|`y'=="johnshopkinsumedi"|`y'=="johnshopkinsupeabodyi"|`y'=="johnshopkinsuschhygienepublichealth"|`y'=="johnshopkinsuschmed"
replace `x' = 155256 if `y' == "kansasuof"
replace `x' = 203517 if `y' == "kentstu"
replace `x' = 157085 if `y' == "kentuckyuof"|`y'=="ukentuckylexington"|`y'=="ukentuckyresearchfoundation"
replace `x' = 165334 if `y' == "lclarku"
replace `x' = 146676 if `y' == "lincolnc"
replace `x' = 159391 if `y' == "louisianastuamcollege"
replace `x' = 159373 if `y' == "louisianastuschmedneworl"
replace `x' = 435000 if `y' == "louisianastuschmedshrevep"
replace `x' = 159647 if `y' == "louisianatechunversity"
replace `x' = 157289 if `y' == "louisvilleuof"|`y'=="ulouisvilleresearchfoundationinc"|`y'=="ulouisvilleschmed"
replace `x' = 141574 if `y' == "hawaiiimarinebiologyuhawaiimanoa"
replace `x' = 435338 if `y' == "lscrippsresearchi"
replace `x' = 170976 if `y' == "lumichigan"|`y'=="michiganuof"|`y'=="umichigan"|`y'=="umichiganmedsch"
replace `x' = 161253 if `y' == "maineuorono"|`y'=="umaineorono"
replace `x' = 163268 if `y' == "marylandubaltcnty"
replace `x' = 163286 if `y' == "marylanduof"|`y'=="umaryland"
replace `x' = 232681 if `y' == "marywashingtoncollege"
replace `x' = 166629 if `y' == "massuofamherst"|`y'=="umassachusettsamherst"|`y'=="umassachusettsmedsch"
replace `x' = 167987 if `y' == "umassachusettsdartmouth"
replace `x' = 166638 if `y' == "umassachusettsharborcampus"
replace `x' = 166513 if `y' == "umassachusettslowell"
replace `x' = 166708 if `y' == "umassachusettsworcester"
replace `x' = 173948 if `y' == "mayoclinicrochester"|`y'=="mayograduatesch"
replace `x' = 204024 if `y' == "miamiu"|`y'=="miamiuoxfordcampus"
replace `x' = 135726 if `y' == "miamiuof"|`y'=="umiamirosenstielschmarineatmosphericsci"|`y'=="umiamischmed"
replace `x' = 171128 if `y' == "michigantechu"
replace `x' = 171137 if `y' == "michiganudearborn"
replace `x' = 178396 if `y' == "missouriucolumbia"
replace `x' = 180489 if `y' == "montanauof"|`y'=="umontana"|`y'=="umontanawestern"|`y'=="umontanamissoula"
replace `x' = 199102 if `y' == "ncagtechstu"|`y'=="northcarolinaagtechnicalstu"
replace `x' = 199193 if `y' == "ncsturaleigh"|`y'=="northcarolinastu"
replace `x' = 199120 if `y' == "ncuofchapelhill"|`y'=="unorthcarolina"|`y'=="theunorthcarolinachapelhill"
replace `x' = 199218 if `y' == "ncuofwilmington"
replace `x' = 181464 if `y' == "nebraskaulincoln"
replace `x' = 181428 if `y' == "nebraskaumedcenter"
replace `x' = 182290 if `y' == "nevadauof"|`y'=="unevada"|`y'=="unevadarenofleischmannplanetarium"
replace `x' = 183044 if `y' == "newhampshireuof"|`y'=="unewhampshire"|`y'=="unewhampshiredurham"
replace `x' = 188012 if `y' == "newmexicostu"
replace `x' = 187985 if `y' == "newmexicouof"|`y'=="unewmexico"
replace `x' = 193654 if `y' == "newschforsocialresearch"|`y'=="newschu"
replace `x' = 194152 if `y' == "newyorkstcollegeceramicsalfredu"
replace `x' = 193900 if `y' == "newyorkucourantimathematicalscience"|`y'=="newyorkumedcenter"|`y'=="newyorkuschmed"
replace `x' = 187967 if `y' == "nmeximiningtech"
replace `x' = 200332 if `y' == "northdakotastu"|`y'=="northdakotastufargo"
replace `x' = 147749 if `y' == "northwesternuchicagocampus"|`y'=="northwesternuchicago"
replace `x' = 152080 if `y' == "notredameuof"
replace `x' = 136215 if `y' == "novaseuoceancenter"
replace `x' = 204796 if `y' == "ohiostu"|`y'=="ohiosturesearchfoundation"|`y'=="theohiostu"
replace `x' = 204857 if `y' == "ohiou"
replace `x' = 207388 if `y' == "oklahomastu"
replace `x' = 207430 if `y' == "oklahomauof"|`y'=="oklauhealthsci"|`y'=="uoklahoma"
replace `x' = 209551 if `y' == "oregonuof"|`y'=="uoregoneugene"
replace `x' = 228778 if `y' == "outexasaustin"|`y'=="texasuaustin"|`y'=="utexasaustin"
replace `x' = 214777 if `y' == "pennsylvaniastu"|`y'=="thepennsylvaniastu"|`y'=="pennsylvaniastuupark"|`y'=="pennstcollegemed"|`y'=="pennstuupark"|`y'=="pennstuhershymed"|`y'=="pennstuhersheymedcntr"|`y'=="pennstugreatvalley"|`y'=="pennstucapitalcollege"|`y'=="pennstu"|`y'=="pennsylvaniastucommonwealthcollege"|`y'=="schreyerhonorscollegepennsylvaniastu"
replace `x' = 214227 if `y' == "pennsylvaniastberkslehighvalleycollege"
replace `x' = 215293 if `y' == "pittsburghuof"|`y'=="upittsburgh"
replace `x' = 194541 if `y' == "polyunewyork"
replace `x' = 152266 if `y' == "purdueunorthcenteral"
replace `x' = 243780 if `y' == "purdueuscheng"
replace `x' = 217484 if `y' == "rhodeislanduof"|`y'=="urhodeislandschoceanograph"
replace `x' = 195030 if `y' == "rochesteruof"|`y'=="urochestermedsch"
replace `x' = 186371 if `y' == "rutgersstunewark"|`y'=="rutgersstunewbrunswick"|`y'=="rutgersu"|`y'=="rutgersunewark"|`y'=="rutgersunewbrunswick"|`y'=="rutgersstunewjerseynewbrunswickcampus"|`y'=="rutgersstunewjerseynewarkcampus"|`y'=="rutgersnewbrunswickumeddentistrynewjerseypiscataway"
replace `x' = 122436 if `y' == "sandiegouof"
replace `x' = 122409 if `y' == "sandiegostufoundation"|`y'=="sandiegostuucaliforniaorniadavis"
replace `x' = 435338 if `y' == "lscrippsresearchi"|`y'=="thescrippsresearchi"|`y'=="scrippsresi"|`y'=="thescrippsresearchiflorida"|`y'=="thescrippsresearchikelloggschsciencetech"
replace `x' = 219347 if `y' == "sdschminestech"
replace `x' = 218663 if `y' == "southcarolinauof"|`y'=="usouthcarolina"|`y'=="usouthcarolinaschmed"
replace `x' = 123961 if `y' == "southerncaliforniauof"|`y'=="usoutherncallosangeles"
replace `x' = 149222 if `y' == "southernillinoisu"
replace `x' = 228246 if `y' == "sthrnmethodistu"
replace `x' = 179159 if `y' == "stlouisu"
replace `x' = 196255 if `y' == "stunewyork"|`y'=="theuthestnewyork"
replace `x' = 196282 if `y' == "stunystonybrook"|`y'=="sunystbrkhthsci"|`y'=="sunystonybrook"
replace `x' = 196103 if `y' == "sunyenvironscienceforestry"
replace `x' = 196307 if `y' == "sunyupstmedu"
replace `x' = 221759 if `y' == "tennuknoxville"|`y'=="utennessee"
replace `x' = 228723 if `y' == "texasamumaincampus"|`y'=="ustexasamu"
replace `x' = 229090 if `y' == "texasudallas"|`y'=="texasuelpaso"
replace `x' = 195049 if `y' == "therockefelleru"
replace `x' = 199139 if `y' == "theunorthcarolinacharlotte"
replace `x' = 221759 if `y' == "theutennesseememphishealthsciencecenter"
replace `x' = 216366 if `y' == "thomasjeffrsonu"
replace `x' = 168148 if `y' == "tuftsuschmed"
replace `x' = 160755 if `y' == "tulaneu"|`y'=="tulaneula"|`y'=="tulaneuschmed"
replace `x' = 200800 if `y' == "uakron"
replace `x' = 100733 if `y' == "ualabama"|`y'=="ualabamatuscaloosa"
replace `x' = 100706 if `y' == "ualabamahuntsville"|`y'=="theualabamainhuntsville"
replace `x' = 102614 if `y' == "ualaskafairbankscampus"|`y'=="ualaskafairbanksjuneaucampus"
replace `x' = 106397 if `y' == "uarkansasfayetteville"|`y'=="uarkansasmedscience"|`y'=="uarkansasmedsciencecampus"
replace `x' = 445188 if `y' == "ucmerced"
replace `x' = 145600 if `y' == "uillinois"
replace `x' = 176017 if `y' == "umississippi"
replace `x' = 181428 if `y' == "unebraskaomaha"
replace `x' = 200280 if `y' == "unorthdakotamaincampus"
replace `x' = 215062 if `y' == "upaschmed"|`y'=="upennsylvaniaschmed"|`y'=="upennsylvaniawhartonschfinancecom"
replace `x' = 251154 if `y' == "upuertoricomayaguez"|`y'=="prumayaguez"|`y'=="upuertoricoriopiedras"|`y'=="upuertoricomedsci"
replace `x' = 181002 if `y' == "uscreightonu"
replace `x' = 448840 if `y' == "usouthfloridastpetersburg"
replace `x' = 230764 if `y' == "utahuof"
replace `x' = 221759 if `y' == "utennesseehealthsciencecenter"|`y'=="utennesseeknoxville"|`y'=="utennesseespacei"
replace `x' = 229090 if `y' == "utexasarlington"|`y'=="utexasdallas"|`y'=="utexaselpaso"|`y'=="utexasmedbrchgalveston"
replace `x' = 416801 if `y' == "utexascancercenterhouston"|`y'=="utexashealthsciencecenterhouston"|`y'=="utexashealthscihouston"|`y'=="utexashealthsciencecentersanantonio"|`y'=="utexashealthscisanantonio"|`y'=="utexassanantonio"|`y'=="utexassouthwesternmedcenter"|`y'=="utexassouthwesternmedcenterdallas"|`y'=="utsouthwesternmedcenter"
replace `x' = 231174 if `y' == "uvermontburlington"|`y'=="uvermontstagcollege"|`y'=="vermontuof"
replace `x' = 234076 if `y' == "uvirginia"|`y'=="uvirginiacharlottesville"|`y'=="virginiauof"
replace `x' = 233921 if `y' == "uvirginiapolyistu"|`y'=="vapolyistu"|`y'=="vapolyitateu"|`y'=="virginiatech"|`y'=="virginiapolyitateun"
replace `x' = 236948 if `y' == "uwashington"|`y'=="uwashingtonschmed"|`y'=="washingtonuof"
replace `x' = 240055 if `y' == "uwisconsin"|`y'=="uwisconsinschmed"|`y'=="wisconsinumadison"
replace `x' = 234030 if `y' == "virginiacommonwealthumedcoll"
replace `x' = 199847 if `y' == "wakeforestubowmangrayschmed"
replace `x' = 236939 if `y' == "washingtonstuspokane"|`y'=="washingtonstuvancouver"
replace `x' = 179867 if `y' == "washingtonuinstlouisschmed"|`y'=="washingtonu"|`y'=="washingtonuschmed"
replace `x' = 238032 if `y' == "westvirginiauresearchcorporation"
replace `x' = 227757 if `y' == "williammarshriceu"
replace `x' = 166610 if `y' == "woodsholeoceanographic"|`y'=="woodsholeoceanographici"|`y'=="woodsholeoceanographici+mitjointprogram"|`y'=="woodsholeoceanographicibiologicaloceanography"|`y'=="woodsholeocni"
replace `x' = 206598 if `y' == "wrightstu"
replace `x' = 240727 if `y' == "wyominguof"
replace `x' = 197708 if `y' == "yeshivauaeieincollegemed"
replace `x' = 219471 if `y' == "usouthdakotamaincampus"
replace `x' = 194310 if `y' == "paceu"
replace `x' = 213020 if `y' == "indianaupennsylvania"
replace `x' = 206589 if `y' == "collegewooster"
replace `x' = 110422 if `y' == "californiaorniapolystu"
*** Additional list for NRC
replace `x' = 190576 if `y' == "cityunewyorkgradcenter"
replace `x' = -77 if `y' == "coldspringharbor"
replace `x' = 203517 if `y' == "kentstumaincampus"
replace `x' = 193405 if `y' == "mtsinaischmed"
replace `x' = 188030 if `y' == "newmexicostumaincampus"
replace `x' = 196060 if `y' == "stunewyorkalbany"
replace `x' = 196079 if `y' == "stunewyorkbinghamton"
replace `x' = 196130 if `y' == "stunewyorkbuffalo"
replace `x' = 196097 if `y' == "stunewyorkstonybrook"
replace `x' = 196255 if `y' == "stunewyorkhealthsciencecenterbrooklyn"
replace `x' = 196307 if `y' == "stunewyorkupstmedu"
replace `x' = 196413 if `y' == "syracuseumaincampus"
replace `x' = 448886 if `y' == "arizonastu"
replace `x' = 106397 if `y' == "uarkansasmaincampus"
replace `x' = 228909 if `y' == "unorthtexashealthsciencecenter"
replace `x' = 137351 if `y' == "usouthflorida"
replace `x' = 180461 if `y' == "montanastubozeman"
}
}
********************************************************************************
** REPLACE -33 FOR MULTIPLE UNIVERSITY AFFILIATION: 31 observations
********************************************************************************
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
set more off
replace `x' = -33 if `y' == "georgiastugeorgiatech"|`y' == "sandiegostuucaliforniaorniasandiego"|`y' == "sandiegostuucaliforniaorniasantabarbara"|`y' == "ucberkeleyucsanfrancisco"|`y' == "ucirvineucriversideucsandiego"|`y' == "ucirvineucsandiego"
}
}
foreach x in ipeds_handmatch {
foreach y in uni_clns_to_match {
set more off
replace `x' = -33 if `y' == "ualabamabirminghamuabtheualabamainhuntsvilleuahualabamaua"|`y' == "theualabamabirminghamuabtheualabamainhuntsvilleuah"|`y' == "umarylandbaltimorecountyumdbaltimoreumdeasternshoreumdcollegepark"|`y'== "umarylandbaltimorecountyumarylandbaltimore"|`y' == "utexashealthsciencecntrhouston\utexasmdandersoncancercntruthsch\utmdacc"|`y' == "georgiaitechemoryu"
}
}
gen ipeds1 = .
lab var ipeds1 "NRC program affiliated with 2 universities, first institution"
foreach x in ipeds1 {
foreach y in uni_clns_to_match {
replace `x' = 139940 if `y' == "georgiastugeorgiatech"
replace `x' = 122409 if `y' == "sandiegostuucaliforniaorniasandiego"
replace `x' = 122409 if `y' == "sandiegostuucaliforniaorniasantabarbara"
replace `x' = 110635 if `y' == "ucberkeleyucsanfrancisco"
replace `x' = 110653 if `y' == "ucirvineucriversideucsandiego"
replace `x' = 110653 if `y' == "ucirvineucsandiego"
replace `x' = 100663 if `y' == "ualabamabirminghamuabtheualabamainhuntsvilleuahualabamaua"
replace `x' = 100663 if `y' == "theualabamabirminghamuabtheualabamainhuntsvilleuah"
replace `x' = 163268 if `y' == "umarylandbaltimorecountyumdbaltimoreumdeasternshoreumdcollegepark"
replace `x' = 163268 if `y' == "umarylandbaltimorecountyumarylandbaltimore"
replace `x' = 229300 if `y' == "utexashealthsciencecntrhouston\utexasmdandersoncancercntruthsch\utmdacc"
replace `x' = 139755 if `y' == "georgiaitechemoryu"
}
}
gen ipeds2 = .
lab var ipeds2 "NRC program affiliated with 2 universities, second institution"
foreach x in ipeds2 {
foreach y in uni_clns_to_match {
replace `x' = 139755 if `y' == "georgiastugeorgiatech"
replace `x' = 110680 if `y' == "sandiegostuucaliforniaorniasandiego"
replace `x' = 110705 if `y' == "sandiegostuucaliforniaorniasantabarbara"
replace `x' = 110699 if `y' == "ucberkeleyucsanfrancisco"
replace `x' = 110680 if `y' == "ucirvineucriversideucsandiego"
replace `x' = 110680 if `y' == "ucirvineucsandiego"
replace `x' = 100706 if `y' == "ualabamabirminghamuabtheualabamainhuntsvilleuahualabamaua"
replace `x' = 100706 if `y' == "theualabamabirminghamuabtheualabamainhuntsvilleuah"
replace `x' = 163286 if `y' == "umarylandbaltimorecountyumdbaltimoreumdeasternshoreumdcollegepark"
replace `x' = 163259 if `y' == "umarylandbaltimorecountyumarylandbaltimore"
replace `x' = 416801 if `y' == "utexashealthsciencecntrhouston\utexasmdandersoncancercntruthsch\utmdacc"
replace `x' = 139658 if `y' == "georgiaitechemoryu"
}
}

********************************************************************************
** IN TOTAL identified IPEDS for 4924 total observations (98.4%); 4892 with single ipeds ID (97.8%)
********************************************************************************
gen ipeds = ipeds_id
replace ipeds = ipeds_handmatch if ipeds_handmatch !=.
lab var ipeds "IPEDS ID, final match"
count if ipeds !=.
count if ipeds !=. & ipeds > 0
count if ipeds1!=. 
count if ipeds2!=.
describe ipeds
gen double ipeds_dbl = ipeds
format ipeds_dbl %12.0f
drop ipeds
rename ipeds_dbl ipeds
save "$dir/NRC 05 Survey/NRC list with IPEDS.dta",replace
}
***** STEP 6: GRFP & NRC Program Crosswalk (respectively)
{
********************************************************************************
	** Clean up NRC Department list 
********************************************************************************
clear all 
use "$dir/NRC 05 Survey/NRC list with IPEDS.dta"

rename nrc_broad_department nrc_broad_dept
rename nrc_department nrc_dept_full

sort nrc_dept
gen nrc_dept = trim(itrim(lower(nrc_dept_full)))
egen nrc_dept_group = group(nrc_dept)
sort nrc_dept_group
by nrc_dept_group: gen nrc_dept_counter = _n
by nrc_dept_group: gen nrc_dept_tally = _N
lab var nrc_dept_tally "number of obs among department list, derived from nrc_department"
lab var nrc_dept_group "NRC dept groups (62 total)"
** note I used this list (in alphabetical order to number the department IDs
gen nrc_dept_id = .
foreach x in nrc_dept_full {
foreach y in nrc_dept_id {
set more off
replace `y' = 1 if `x' == "Aerospace Engineering" 
replace `y' = 2 if `x' == "Agricultural and Resource Economics"
replace `y' = 4 if `x' == "Animal Sciences"
replace `y' = 5 if `x' == "Anthropology"
replace `y' = 6 if `x' == "Applied Mathematics" 
replace `y' = 7 if `x' == "Astrophysics and Astronomy" 
replace `y' = 8 if `x' == "Biochemistry, Biophysics, and Structural Biology" 
replace `y' = 9 if `x' == "Biology/Integrated Biology/Integrated Biomedical Sciences (Note: Use this field only if the degree field is not specialized.)"
replace `y' = 10 if `x' == "Biomedical Engineering and Bioengineering"
replace `y' = 11 if `x' == "Cell and Developmental Biology" 
replace `y' = 12 if `x' == "Chemical Engineering"
replace `y' = 13 if `x' == "Chemistry" 
replace `y' = 14 if `x' == "Civil and Environmental Engineering"
replace `y' = 16 if `x' == "Communication" 
replace `y' = 18 if `x' == "Computer Engineering" 
replace `y' = 19 if `x' == "Computer Sciences" 
replace `y' = 20 if `x' == "Earth Sciences"  
replace `y' = 21 if `x' == "Ecology and Evolutionary Biology" 
replace `y' = 22 if `x' == "Economics"
replace `y' = 23 if `x' == "Electrical and Computer Engineering"
replace `y' = 24 if `x' == "Engineering Science and Materials (not elsewhere classified)"
replace `y' = 26 if `x' == "Entomology"
replace `y' = 27 if `x' == "Food Science" 
replace `y' = 28 if `x' == "Forestry and Forest Sciences" 
replace `y' = 30 if `x' == "Genetics and Genomics" 
replace `y' = 31 if `x' == "Geography"
replace `y' = 33 if `x' == "History"
replace `y' = 35 if `x' == "Immunology and Infectious Disease"
replace `y' = 36 if `x' == "Kinesiology"
replace `y' = 38 if `x' == "Linguistics"  
replace `y' = 39 if `x' == "Materials Science and Engineering"
replace `y' = 40 if `x' == "Mathematics"
replace `y' = 41 if `x' == "Mechanical Engineering" 
replace `y' = 42 if `x' == "Microbiology" 
replace `y' = 44 if `x' == "Neuroscience and Neurobiology" 
replace `y' = 45 if `x' == "Nursing" 
replace `y' = 46 if `x' == "Nutrition" 
replace `y' = 47 if `x' == "Oceanography, Atmospheric Sciences and Meteorology" 
replace `y' = 48 if `x' == "Operations Research, Systems Engineering and Industrial Engineering" 
replace `y' = 49 if `x' == "Pharmacology, Toxicology and Environmental Health" 
replace `y' = 51 if `x' == "Physics" 
replace `y' = 52 if `x' == "Physiology" 
replace `y' = 53 if `x' == "Plant Sciences" 
replace `y' = 54 if `x' == "Political Science" 
replace `y' = 55 if `x' == "Psychology" 
replace `y' = 56 if `x' == "Public Affairs, Public Policy and Public Administration" 
replace `y' = 57 if `x' == "Public Health" 
replace `y' = 59 if `x' == "Sociology"
replace `y' = 61 if `x' == "Statistics and Probability" 
** Humanities departments **
replace `y' = 3 if `x' == "American Studies" 
replace `y' = 15 if `x' == "Classics"
replace `y' = 17 if `x' == "Comparative Literature" 
replace `y' = 25 if `x' == "English Language and Literature" 
replace `y' = 29 if `x' == "French and Francophone Language and Literature" 
replace `y' = 32 if `x' == "German Language and Literature" 
replace `y' = 34 if `x' == "History of Art, Architecture and Archaeology" 
replace `y' = 37 if `x' == "Languages, Societies and Cultures"
replace `y' = 43 if `x' == "Music (except performance)" 
replace `y' = 50 if `x' == "Philosophy" 
replace `y' = 58 if `x' == "Religion" 
replace `y' = 60 if `x' == "Spanish and Portuguese Language and Literature" 
replace `y' = 62 if `x' == "Theatre and Performance Studies"
}
}
** note altough nrc_dept_id = nrc_dept_group, I inlcuded this code to ensure department ID is consistent across different datasets
** 4892 total departments 
gen double ipeds_to_id = ipeds if ipeds > 0 & ipeds !=.
format ipeds_to_id %12.0f
count if ipeds_to_id !=.
replace ipeds_to_id = ipeds_to_id * 1000
describe ipeds_to_id nrc_dept_id
foreach x in nrc_dept_id{
gen double `x'1 = `x'
format `x'1 %12.0f
}
drop nrc_dept_id
rename nrc_dept_id nrc_dept_id
gen double nrc_ipeds_dept_id = ipeds_to_id + nrc_dept_id
format nrc_ipeds_dept_id %12.0f
** note the 112 missing values do not have ipeds id (these programs were listed with multiple institutions)**
lab var nrc_ipeds_dept_id "Unique University/Dept ID (Ipeds+0+NRC dept)"
gen double ipeds_dept_id_to_merge = nrc_ipeds_dept_id
sort ipeds_dept_id_to_merge
drop if ipeds_dept_id_to_merge ==.
count if dup > 1
** 974 values are ipeds_dept duplicates: this is where the program is unique but the University and Narrow field overlap. 
** Would it be ideal to clean up the NRC data HERE rather than later on? This way we can merge based on unique value.
** Note, with do dile V_A we now clean up the NRC dup data before merging with GRFP. This allows for 1:1 merge on university department.

save "$dir/NRC 05 Survey/NRC list with IPEDS & dept clean.dta", replace
/*******************************************************************************
Note: this database is the original NRC file. It now has the IPEDS ID & Unique Department ID.
In addition, this dataset includes a unique university/department id (ipeds_dept_id_to_merge).
Use this variable to merge with other databases.
*******************************************************************************/

********************************************************************************
	** Clean up GRFP Department list "$dir/GRFP/GRFP list with uni & cur IPEDS final.dta" 
********************************************************************************
clear all 
use "$dir/GRFP/GRFP list with uni & cur IPEDS final.dta"
br department
sort department
rename department grfp_dept

** Clean up department name
gen grfp_dept_full = trim(itrim(lower(grfp_dept)))
gen grfp_dept_to_clean = grfp_dept_full

foreach x in grfp_dept_to_clean {
set more off
*replace `x' = subinstr(`x', "-", "", .)
replace `x' = subinstr(`x', ",", "", .)
replace `x' = subinstr(`x', "/", "", .)
replace `x' = subinstr(`x', "(", "", .)
replace `x' = subinstr(`x', ")", "", .)
replace `x' = subinstr(`x', ".", "", .)
replace `x' = subinstr(`x', " ", "", .)
}

********************************************************************************
** Pull out those with unique combination of strings
********************************************************************************
gen grfp_dept_id = .

** Aerospace Engineering 1
foreach x in grfp_dept_id {
foreach y in grfp_dept_to_clean {
set more off
replace `x' = 1 if regexm(`y',"^[a]")&regexm(`y',"[e][n][g]")
replace `x' = 1 if regexm(`y',"[a][e][r][o]")&`x'==.
replace `x' = 1 if regexm(`y',"[v][e][h][i][c]")&`x'==.
replace `x' = 1 if regexm(`y',"[a][s][t][r][o][d]")&`x'==.
replace `x' = 1 if regexm(`y',"[p][r][o][p][u]")&`x'==.
** Agricultural and resource economics 2
replace `x' = 2 if regexm(`y',"^[a]")&regexm(`y',"[e][c][o][n]")&`x'==.
replace `x' = 2 if regexm(`y',"^[r]")&regexm(`y',"[e][c][o][n]")&`x'==.
** Animal Science 4: "anim"
replace `x' = 4 if regexm(`y',"^[a][n][i][m]")&`x'==.
replace `x' = 4 if regexm(`y',"[a][n][i][m][a]")&`x'==.
replace `x' = 4 if regexm(`y',"[a][q][u][a][c][u]")&`x'==.
replace `x' = 4 if regexm(`y',"[m][a][r][i][n]")&`x'==.
replace `x' = 4 if regexm(`y',"[f][i][s][h]")&`x'==.
replace `x' = 4 if regexm(`y',"[d][a][i][r][y]")&`x'==.
replace `x' = 4 if regexm(`y',"[p][o][u][l][t]")&`x'==.
replace `x' = 4 if regexm(`y',"[z][o][o]")&`x'==.
replace `x' = 4 if regexm(`y',"[a][v][i][a][n]")&regexm(`y',"[s][c][i]")&`x'==.
** Entomology 26
replace `x' = 26 if regexm(`y',"[e][n][t][o]")&`x'==.
replace `x' = 26 if regexm(`y',"[a][n][t][h][r][o][p][o][d]")&`x'==.
** Applied Mathematics 6
replace `x' = 6 if regexm(`y',"^[a]")&regexm(`y',"[m][a][t][h]")&`x'==.
replace `x' = 6 if regexm(`y',"[c][o][n][t][r][o][l]")&regexm(`y',"[t][h][e]")&`x'==.
replace `x' = 6 if regexm(`y',"[d][y][n][a][m][i][c]")&regexm(`y',"[s][y][s]")&`x'==.
replace `x' = 6 if regexm(`y',"[d][y][n][a][m][i][c]")&regexm(`y',"[a][p][p]")&`x'==.
replace `x' = 6 if regexm(`y',"[d][i][f][f]")&regexm(`y',"[e][q]")&`x'==.
replace `x' = 6 if regexm(`y',"[l][i][n][e][a][r]")&`x'==.
replace `x' = 6 if regexm(`y',"[n][u][m][e][r][i][c][a][l]")&`x'==.
** Astrophysics & Astronomy 7
replace `x' = 7 if regexm(`y',"^[a]")&regexm(`y',"[p][h][y][s]")&`x'==.
replace `x' = 7 if regexm(`y',"[a][s][t][r]")&regexm(`y',"[p][h][y][s]")&`x'==.
replace `x' = 7 if regexm(`y',"[p][r][o][c]")&regexm(`y',"[p][h][y][s]")&`x'==.
replace `x' = 7 if regexm(`y',"[i][n][s][t][r][u][m]")&`x'==.
replace `x' = 7 if regexm(`y',"[s][o][l][a][r]")&`x'==.
replace `x' = 7 if regexm(`y',"[s][t][a][r]")&`x'==.
replace `x' = 7 if regexm(`y',"[s][t][e][l][l][a][r]")&`x'==.
replace `x' = 7 if regexm(`y',"[g][a][l][a][x]")&`x'==.
replace `x' = 7 if regexm(`y',"[c][o][s][m][o][l]")&`x'==.
replace `x' = 7 if regexm(`y',"^[a][s][t][r][o][n]")&`x'==.
** Nutrition 46
replace `x' = 46 if regexm(`y',"[n][u][t][r]")&`x'==.
** Biochemistry, Biophysics and Structural Biology 8
replace `x' = 8 if regexm(`y',"^[b][i][o][c][h][e][m]")&`x'==.
replace `x' = 8 if regexm(`y',"[b][i][o][c][h][e][m]")&`x'==.
replace `x' = 8 if regexm(`y',"^[b][i][o][p][h][y][s]")&`x'==.
replace `x' = 8 if regexm(`y',"^[b]")&regexm(`y',"[p][h][y][s]")&`x'==.
replace `x' = 8 if regexm(`y',"[b][i][o][p][h][y][s]")&`x'==.
replace `x' = 8 if regexm(`y',"[s][t][r][u]")&regexm(`y',"[b][i][o]")&`x'==.
replace `x' = 8 if regexm(`y',"[m][o][l]")&regexm(`y',"[b][i][o]")&`x'==.
** Biomedical Engineering and Bioengineering 10
replace `x' = 10 if regexm(`y',"[b][i][o][m][e][d]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o][e][n]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[m][e][d]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[e][l][e]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[i][m][a]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[m][a][t]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[m][e][c]")&`x'==.
replace `x' = 10 if regexm(`y',"[b][i][o]")&regexm(`y',"[t][r][a]")&`x'==.
replace `x' = 10 if regexm(`y',"[e][n][g]")&regexm(`y',"[t][i][s][s]")&`x'==.
replace `x' = 10 if regexm(`y',"[e][n][g]")&regexm(`y',"[c][e][l][l]")&`x'==.
** Chemical Engineering 12
replace `x' = 12 if regexm(`y',"[c][h][e][m][i][c]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 12 if regexm(`y',"^[e]")&regexm(`y',"[c][h][e][m]")&`x'==.
replace `x' = 12 if regexm(`y',"[c][a][t]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 12 if regexm(`y',"[c][o][m]")&regexm(`y',"[f][l][u]")&`x'==.
replace `x' = 12 if regexm(`y',"[m][e][m][b]")&regexm(`y',"[s][c][i]")&`x'==.
replace `x' = 12 if regexm(`y',"[t][r][a][n][s]")&regexm(`y',"[p][h][e]")&`x'==.
replace `x' = 12 if regexm(`y',"[p][e][t][r][o][l]")&`x'==.
replace `x' = 12 if regexm(`y',"[t][h][e][r][m][o][d]")&`x'==.
** Civil and Environmental Engineering 14
replace `x' = 14 if regexm(`y',"[c][i][v]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 14 if regexm(`y',"^[e]")&regexm(`y',"[c][i][v]")&`x'==.
replace `x' = 14 if regexm(`y',"^[e]")&regexm(`y',"[e][n][v]")&`x'==.
replace `x' = 14 if regexm(`y',"[c][o][n][s]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 14 if regexm(`y',"[g][e][o][t]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 14 if regexm(`y',"[e][n][v]")&regexm(`y',"[e][n][g]")&`x'==.
** Communication 16
replace `x' = 16 if regexm(`y',"[c][o][m][m][u]")&`x'==.
replace `x' = 16 if regexm(`y',"[b][r][o][a][d][c][a][s][t]")&`x'==.
replace `x' = 16 if regexm(`y',"[j][o][u][r][n][a]")&`x'==.
replace `x' = 16 if regexm(`y',"[s][p][e][e][c][h]")&`x'==.
replace `x' = 16 if regexm(`y',"[p][u][b]")&regexm(`y',"[r][e][l]")&`x'==.
** Computer Engineering 18
replace `x' = 18 if regexm(`y',"[c][o][m][p]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 18 if regexm(`y',"^[e][-]")&regexm(`y',"[c][o][m][p]")&`x'==.
replace `x' = 18 if regexm(`y',"[c][o][m][p]")&regexm(`y',"[a][r][c]")&`x'==.
replace `x' = 18 if regexm(`y',"[d][i][g][i]")&regexm(`y',"[c][i][r]")&`x'==.
replace `x' = 18 if regexm(`y',"[d][a][t][a]")&regexm(`y',"[s][t][o][r]")&`x'==.
replace `x' = 18 if regexm(`y',"[h][a][r][d][w][a][r][e]")&`x'==.
replace `x' = 18 if regexm(`y',"[r][o][b][o]")&`x'==.
** Electrical & Computer Engineering 23
replace `x' = 23 if regexm(`y',"[e][l][e]")&regexm(`y',"[c][s]")&`x'==.
*replace `x' = 23 if regexm(`y',"[c][o][m][p]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 23 if regexm(`y',"[s][e][m][i]")&regexm(`y',"[m][a][n]")&`x'==.
replace `x' = 23 if regexm(`y',"[c][o][n][t]")&regexm(`y',"[t][h][e]")&`x'==.
replace `x' = 23 if regexm(`y',"[n][a][n][o]")&regexm(`y',"[f][a][b]")&`x'==.
replace `x' = 23 if regexm(`y',"[s][i][g][n][a]")&regexm(`y',"[p][r][o]")&`x'==.
replace `x' = 23 if regexm(`y',"[e][l][e][c][t][r][o][m][a]")&`x'==.
replace `x' = 23 if regexm(`y',"[v][l][s][i]")&`x'==.
replace `x' = 23 if regexm(`y',"[c][i][r][c][u][i][t][s]")&`x'==.
replace `x' = 23 if regexm(`y',"^[e]")&regexm(`y',"[e][l][e]")&`x'==.
replace `x' = 23 if regexm(`y',"[e][n][g]")&regexm(`y',"[e][n][e]")&`x'==.
** Earth Sciences 20
replace `x' = 20 if regexm(`y',"[e][a][r][t][h]")&`x'==.
replace `x' = 20 if regexm(`y',"[m][i][n]")&regexm(`y',"[p][h][y]")&`x'==.
replace `x' = 20 if regexm(`y',"[e][n][v]")&regexm(`y',"[s][c][i]")&`x'==.
replace `x' = 20 if regexm(`y',"[c][o][s][m][o][c]")&`x'==.
replace `x' = 20 if regexm(`y',"[g][e][o][l][o]")&`x'==.
replace `x' = 20 if regexm(`y',"[g][e][o][p][h]")&`x'==.
replace `x' = 20 if regexm(`y',"[p][a][l][e][o][b]")&`x'==.
replace `x' = 20 if regexm(`y',"[p][a][l][e][o][n]")&`x'==.
replace `x' = 20 if regexm(`y',"[v][u][l][c]")&`x'==.
replace `x' = 20 if regexm(`y',"[t][e][c][t]")&`x'==.
replace `x' = 20 if regexm(`y',"[s][e][i][s]")&`x'==.
replace `x' = 20 if regexm(`y',"[s][o][i][l]")&`x'==.
replace `x' = 20 if regexm(`y',"[g][l][a][c]")&`x'==.
replace `x' = 20 if regexm(`y',"[g][e][o][s][c][i]")&`x'==.
replace `x' = 20 if regexm(`y',"[h][y][d][r][o][s]")&`x'==.
replace `x' = 20 if regexm(`y',"^[g][e][o][c][h][e]")&`x'==.
replace `x' = 20 if regexm(`y',"[g][e][o][s][c][-]")&`x'==.
** Economics 22
replace `x' = 22 if regexm(`y',"[e][c][o][n]")&`x'==.
replace `x' = 22 if regexm(`y',"[g][r][o][w][t][h]")&regexm(`y',"[d][e][v]")&`x'==.
replace `x' = 22 if regexm(`y',"[i][n][d][u][s][t][r][i][a][l]")&regexm(`y',"[o][r][g]")&`x'==.
** Food Science 27
replace `x' = 27 if regexm(`y',"[f][o][o][d]")&`x'==.
replace `x' = 27 if regexm(`y',"[w][o][o][d]")&`x'==.
replace `x' = 27 if regexm(`y',"[p][u][l][p]")&`x'==.
replace `x' = 27 if regexm(`y',"[p][a][p][e][r]")&`x'==.
** Forestry & Forest Sciences 28
replace `x' = 28 if regexm(`y',"[f][o][r][e]")&`x'==.
** Plant Sciences 53
replace `x' = 53 if regexm(`y',"[p][l][a][n][t]")&`x'==.
replace `x' = 53 if regexm(`y',"[a][g][r][o][n]")&`x'==.
replace `x' = 53 if regexm(`y',"[h][o][r][t][i]")&`x'==.
replace `x' = 53 if regexm(`y',"[b][i][o][i][n][f]")&`x'==.
replace `x' = 53 if regexm(`y',"[b][i][o]")&regexm(`y',"[t][e][c][h]")&`x'==.
replace `x' = 53 if regexm(`y',"[b][i][o]")&regexm(`y',"[s][y][s]")&`x'==.
replace `x' = 53 if regexm(`y',"[b][o][t][a][n]")&`x'==.
** Genetics & Genomics 30
replace `x' = 30 if regexm(`y',"[g][e][n][e]")&`x'==.
replace `x' = 30 if regexm(`y',"[g][e][n][o]")&`x'==.
replace `x' = 30 if regexm(`y',"[p][r][o][t][e][o]")&`x'==.
replace `x' = 30 if regexm(`y',"[c][o][m][p]")&regexm(`y',"[b][i][o]")&`x'==.
** Geography 31
replace `x' = 31 if regexm(`y',"[g][e][o][g]")&`x'==.
replace `x' = 31 if regexm(`y',"[n][a][t][u][r][e]")&`x'==.
** History 33
replace `x' = 33 if regexm(`y',"[h][i][s][t][o][r][y]")&`x'==.
replace `x' = 33 if regexm(`y',"[h][s][t][o][f]")&`x'==.
replace `x' = 33 if regexm(`y',"[p][h][l][o][f]")&`x'==.
** Immunology and Infectious Disease 35
replace `x' = 35 if regexm(`y',"[i][m][m][u]")&`x'==.
replace `x' = 35 if regexm(`y',"[p][r][o][p][h][y]")&`x'==.
replace `x' = 35 if regexm(`y',"[i][n][f][e]")&`x'==.
replace `x' = 35 if regexm(`y',"[p][a][t][h][o]")&`x'==.
replace `x' = 35 if regexm(`y',"[v][i][r][o][l]")&`x'==.
replace `x' = 35 if regexm(`y',"[p][a][r][a][s]")&`x'==.
replace `x' = 35 if regexm(`y',"[d][i][s][e][a][s][e]")&`x'==.
** Kinesiology 36
replace `x' = 36 if regexm(`y',"[k][i][n][e]")&`x'==.
replace `x' = 36 if regexm(`y',"[b][i][o][m][e]")&`x'==.
replace `x' = 36 if regexm(`y',"[e][x][e][r]")&regexm(`y',"[p][h][y][s][i][o]")&`x'==.
replace `x' = 36 if regexm(`y',"[m][o][t][o][r]")&`x'==.
** Linguisitics 38
replace `x' = 38 if regexm(`y',"[l][i][n][g]")&`x'==.
replace `x' = 38 if regexm(`y',"[l][a][n][g]")&`x'==.
replace `x' = 38 if regexm(`y',"[m][o][r][p][h][o]")&`x'==.
replace `x' = 38 if regexm(`y',"[p][h][o][n][e][t]")&`x'==.
replace `x' = 38 if regexm(`y',"[p][h][o][n][o][l]")&`x'==.
replace `x' = 38 if regexm(`y',"[s][e][m][a][n][t]")&`x'==.
replace `x' = 38 if regexm(`y',"[s][y][n][t][a][x]")&`x'==.
** Materials Science and Engineering 39
replace `x' = 39 if regexm(`y',"[m][e][t][a][l]")&`x'==.
replace `x' = 39 if regexm(`y',"[b][i][o][m][i]")&`x'==.
replace `x' = 39 if regexm(`y',"[m][a][t]")&regexm(`y',"[s][c][i]")&`x'==.
replace `x' = 39 if regexm(`y',"^[m][s]")&`x'==.
** Mechanical Engineering 41
replace `x' = 41 if regexm(`y',"^[e][-]")&regexm(`y',"[m][e][c]")&`x'==.
replace `x' = 41 if regexm(`y',"[m][e][c]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 41 if regexm(`y',"[m][e][c]")&regexm(`y',"[a][p][p]")&`x'==.
replace `x' = 41 if regexm(`y',"[e][n][e][r]")&regexm(`y',"[s][y][s]")&`x'==.
replace `x' = 41 if regexm(`y',"[h][e][a][t]")&regexm(`y',"[t][r][a]")&`x'==.
replace `x' = 41 if regexm(`y',"[a][c][o][u][s]")&`x'==.
replace `x' = 41 if regexm(`y',"[c][o][m][b][u]")&`x'==.
replace `x' = 41 if regexm(`y',"[t][r][i][b][o]")&`x'==.
** Microbiology 42
replace `x' = 42 if regexm(`y',"[m][i][c]")&regexm(`y',"[e][n][v]")&`x'==.
replace `x' = 42 if regexm(`y',"[m][i][c]")&regexm(`y',"[b][i][o]")&`x'==.
replace `x' = 42 if regexm(`y',"[b][a][c][t][e]")&`x'==.
replace `x' = 42 if regexm(`y',"[p][a][t][h][o]")&`x'==.
replace `x' = 42 if regexm(`y',"[m][i][c][r][o][b][i][a][l]")&`x'==.
** Nursing 45
replace `x' = 45 if regexm(`y',"[n][u][r][s]")&`x'==.
** Oceanography, Atmospheric Sciences and Meteorology 47
replace `x' = 47 if regexm(`y',"[o][c][e][a][n]")&`x'==.
replace `x' = 47 if regexm(`y',"[a][t][m][o][s]")&`x'==.
replace `x' = 47 if regexm(`y',"[m][e][t][e][o]")&`x'==.
replace `x' = 47 if regexm(`y',"[c][l][i][m]")&`x'==.
replace `x' = 47 if regexm(`y',"[f][r][e][s][h]")&`x'==.
** Operations Research, Systems Engineering and Industrial Engineering 48
replace `x' = 48 if regexm(`y',"^[e][-]")&regexm(`y',"[i][n][d]")&`x'==.
replace `x' = 48 if regexm(`y',"[o][p][e][r]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 48 if regexm(`y',"[o][p][e][r]")&regexm(`y',"[r][e][s]")&`x'==.
replace `x' = 48 if regexm(`y',"[s][y][s]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 48 if regexm(`y',"[i][n][d]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 48 if regexm(`y',"[e][r][g][o][n]")&`x'==.
** Pharmacology, Toxicology and Environmental Health 49
replace `x' = 49 if regexm(`y',"[p][h][a][r][m]")&`x'==.
replace `x' = 49 if regexm(`y',"[m][e][d][i][c][i][n][a][l]")&`x'==.
replace `x' = 49 if regexm(`y',"[t][o][x][i]")&`x'==.
replace `x' = 49 if regexm(`y',"[e][n][v]")&regexm(`y',"[h][e][a]")&`x'==.
** Physiology 52 (note exercise physiology is listed in bio 9)
replace `x' = 52 if regexm(`y',"[p][h][y][s][i][o]")&`x'==.
replace `x' = 52 if regexm(`y',"[e][n][d][c][r][i]")&`x'==.
** Political Science 54
replace `x' = 54 if regexm(`y',"[p][o][l][i][t]")&`x'==.
replace `x' = 54 if regexm(`y',"[p][o][l]")&regexm(`y',"[s][c][i]")&`x'==.
replace `x' = 54 if regexm(`y',"[i][n][t][e][r]")&regexm(`y',"[r][e][l]")&`x'==.
replace `x' = 54 if regexm(`y',"[i][n][t][l]")&regexm(`y',"[r][e][l]")&`x'==.
** Psychology 55 (note, psychology of movement is listed in bio 9)
replace `x' = 9 if regexm(`y',"[p][s][y][c][h]")&regexm(`y',"[m][o][v]")&`x'==.
replace `x' = 55 if regexm(`y',"[p][s][y][c][h]")&`x'==.
replace `x' = 55 if regexm(`y',"[p][e][r][c][e][p]")&`x'==.
replace `x' = 55 if regexm(`y',"[p][e][r][s][o][n][a][l][i][t][y]")&`x'==.
** Public Affairs, Public Policy and Public Administration 56
replace `x' = 56 if regexm(`y',"[p][u][b]")&regexm(`y',"[a][d]")&`x'==.
replace `x' = 56 if regexm(`y',"[p][u][b]")&regexm(`y',"[p][o][l]")&`x'==.
replace `x' = 56 if regexm(`y',"[u][r][b]")&regexm(`y',"[a][f]")&`x'==.
replace `x' = 56 if regexm(`y',"[u][r][b]")&regexm(`y',"[r][e][g]")&`x'==.
replace `x' = 56 if regexm(`y',"[u][r][b]")&regexm(`y',"[s][t][u]")&`x'==.
replace `x' = 56 if regexm(`y',"[p][u][b]")&regexm(`y',"[a][f]")&`x'==.
replace `x' = 56 if regexm(`y',"[p][o][l][i][c][y]")&`x'==.
replace `x' = 56 if regexm(`y',"[a][d][m][i][n]")&`x'==.
replace `x' = 56 if regexm(`y',"[a][f][f]")&`x'==.
** Public Health 57
replace `x' = 57 if regexm(`y',"[p][u][b]")&regexm(`y',"[h][e][a]")&`x'==.
replace `x' = 57 if regexm(`y',"[b][i][o]")&regexm(`y',"[s][t][a][t]")&`x'==.
replace `x' = 57 if regexm(`y',"[e][p][i]")&`x'==.
** Sociology 59
replace `x' = 59 if regexm(`y',"[s][o][c][i][o]")&`x'==.
replace `x' = 59 if regexm(`y',"[s][o][c][l]")&`x'==.
*replace `x' = 59 if regexm(`y',"[s][o][c][i][a][l]")&`x'==.
replace `x' = 59 if regexm(`y',"[d][e][m][o][g]")&`x'==.
replace `x' = 59 if regexm(`y',"[w][o][r][k]")&`x'==.
replace `x' = 59 if regexm(`y',"[c][r][i][m][i][n]")&`x'==.
replace `x' = 59 if regexm(`y',"[g][e][n][d][e][r]")&`x'==.
replace `x' = 59 if regexm(`y',"[r][a][c][e]")&`x'==.
replace `x' = 59 if regexm(`y',"[e][t][h][n][i]")&`x'==.
replace `x' = 59 if regexm(`y',"[i][n][e][q][u][a][l]")&`x'==.
** Statistics and Probability 61
replace `x' = 61 if regexm(`y',"[s][t][a][t]")&`x'==.
replace `x' = 61 if regexm(`y',"[p][r][o][b]")&`x'==.
replace `x' = 61 if regexm(`y',"[b][i][o][m][e][t]")&`x'==.
********************************************************************************
** List these later since they are the more prominent fields
********************************************************************************
** Neuroscience and Neurobiology 44
replace `x' = 44 if regexm(`y',"[n][e][u][r]")&`x'==.
replace `x' = 44 if regexm(`y',"[n][u][e][r]")&`x'==.
** Anthropology 5: "anth"
replace `x' = 5 if regexm(`y',"^[a][n][t][h]")&`x'==.
replace `x' = 5 if regexm(`y',"[a][n][t][h][r][o]")&`x'==.
replace `x' = 5 if regexm(`y',"[a][r][c][h][a][e]")&`x'==.
replace `x' = 5 if regexm(`y',"[a][r][c][h][e][o]")&`x'==.
** Cell and Developmental Biology 11
replace `x' = 11 if regexm(`y',"[c][e][l][l]")&`x'==.
replace `x' = 11 if regexm(`y',"[d][e][v]")&regexm(`y',"[b][i][o]")&`x'==.
replace `x' = 11 if regexm(`y',"[c][a][n][c]")&regexm(`y',"[b][i][o]")&`x'==.
replace `x' = 11 if regexm(`y',"[a][n][a][t]")&`x'==.
** Ecology & Evolutionary Biology 21
replace `x' = 21 if regexm(`y',"[e][c][o][l]")&`x'==.
replace `x' = 21 if regexm(`y',"[e][v][o]")&`x'==.
replace `x' = 21 if regexm(`y',"[e][t][h][o][l]")&`x'==.
replace `x' = 21 if regexm(`y',"[p][o][p]")&regexm(`y',"[b][i][o]")&`x'==.
** Mathematics 40
replace `x' = 40 if regexm(`y',"[m][a][t][h]")&`x'==.
replace `x' = 40 if regexm(`y',"[a][l][g][e][b]")&`x'==.
replace `x' = 40 if regexm(`y',"[g][e][o][m][e]")&`x'==.
replace `x' = 40 if regexm(`y',"[c][o][m][b][a]")&`x'==.
replace `x' = 40 if regexm(`y',"[l][o][g][i][c]")&`x'==.
replace `x' = 40 if regexm(`y',"[d][y][n]")&regexm(`y',"[s][y][s]")&`x'==.
replace `x' = 40 if regexm(`y',"[n][u][m]")&regexm(`y',"[t][h][e]")&`x'==.
replace `x' = 40 if regexm(`y',"[s][e][t]")&regexm(`y',"[t][h][e]")&`x'==.
** Computer Sciences 19
replace `x' = 19 if regexm(`y',"[c][o][m][p]")&regexm(`y',"[s][c][i]")&`x'==.
replace `x' = 19 if regexm(`y',"[i][n][f][o]")&regexm(`y',"[s][y][s]")&`x'==.
replace `x' = 19 if regexm(`y',"[n][u][m]")&regexm(`y',"[a][n][a]")&`x'==.
replace `x' = 19 if regexm(`y',"[t][h][e]")&regexm(`y',"[a][l][g]")&`x'==.
replace `x' = 19 if regexm(`y',"[o][s]")&regexm(`y',"[n][e][t]")&`x'==.
replace `x' = 19 if regexm(`y',"^[c][s]")&`x'==.
replace `x' = 19 if regexm(`y',"[a][i]")&`x'==.
replace `x' = 19 if regexm(`y',"[a][r][t][i][f][i][c][i][a][l]")&`x'==.
replace `x' = 19 if regexm(`y',"[s][o][f][t]")&`x'==.
replace `x' = 19 if regexm(`y',"[p][r][o][g][r][a][m][m]")&`x'==.
replace `x' = 19 if regexm(`y',"[c][o][m][p][l][i][e][r]")&`x'==.
replace `x' = 19 if regexm(`y',"[g][r][a][p][h][i][c]")&`x'==.
replace `x' = 19 if regexm(`y',"[i][n][t][e][r][f][a]")&`x'==.
** Biology/Integrated Biology/Integrated Biomedical Sciences (Note: Use this field only if the degree field is not specialized.) 9
replace `x' = 9 if regexm(`y',"[b][i][o]")&`x'==.
** Chemistry
replace `x' = 13 if regexm(`y',"^[c][h][e][m]")&`x'==.
replace `x' = 13 if regexm(`y',"[p][o][l][y][m][e][r]")&`x'==.
replace `x' = 13 if regexm(`y',"[o][r][g][a][n]")&`x'==.
replace `x' = 13 if regexm(`y',"[o][r][g][n]")&`x'==.
** Engineering Science and Materials (not elsewhere classified) 24
replace `x' = 24 if regexm(`y',"[m][e][c][h][a][n]")&regexm(`y',"[e][n][g]")&`x'==.
replace `x' = 24 if regexm(`y',"[m][e][c][h][a][n]")&regexm(`y',"[m][a][t]")&`x'==.
** Physics 51
replace `x' = 51 if regexm(`y',"[p][h][y][s][i][c][s]")&`x'==.
replace `x' = 51 if regexm(`y',"[g][r][a][v][i][t][y]")&`x'==.
replace `x' = 51 if regexm(`y',"[r][e][l][a][t][i][v]")&`x'==.
replace `x' = 51 if regexm(`y',"[d][y][n][a][m][i][c]")&regexm(`y',"[f][l][u][i][d]")&`x'==.
replace `x' = 51 if regexm(`y',"[d][y][n][a][m][i][c]")&regexm(`y',"[l][i][n][e][a]")&`x'==.
replace `x' = 51 if regexm(`y',"[s][t][r][i][n][g]")&`x'==.
replace `x' = 51 if regexm(`y',"[n][u][c][l][e][a]")&`x'==.
replace `x' = 51 if regexm(`y',"[o][p][t][i][c][s]")&`x'==.
replace `x' = 51 if regexm(`y',"[p][l][a][s][m][a]")&`x'==.
replace `x' = 51 if regexm(`y',"[q][u][a][n][t][u]")&`x'==.
replace `x' = 51 if regexm(`y',"[m][a][t][t]")&`x'==.
replace `x' = 51 if regexm(`y',"[c][o][n][d][m]")&`x'==.
replace `x' = 51 if regexm(`y',"[a][t][o][m][c]")&`x'==.
replace `x' = 51 if regexm(`y',"[p][h][y][s]")&`x'==.
********************************************************************************
** Extras to assign department
********************************************************************************
** Material Science (extra)
replace `x' = 39 if regexm(`y',"[m][a][t][e][r]")&`x'==.
** extras (N/A)
replace `x' = -99 if regexm(`y',"^[n][a]")&`x'==.
replace `x' = 9 if regexm(`y',"[l][i][f][e]")&`x'==.
replace `x' = 24 if regexm(`y',"^[e][-]")&`x'==.
replace `x' = 24 if regexm(`y',"^[e][n][g]")&`x'==.
replace `x' = 19 if regexm(`y',"[c][s]")&`x'==.
replace `x' = 19 if regexm(`y',"[c][o][m][p][u]")&`x'==.
replace `x' = 41 if regexm(`y',"[m][e][c][h][a][n][c][l]")&`x'==.
replace `x' = 23 if regexm(`y',"[e][l][e][c][t][r][c][l]")&`x'==.
replace `x' = 20 if regexm(`y',"[e][n][v][i][r][o][n]")&`x'==.
replace `x' = 7 if regexm(`y',"[a][s][t][r][o][n]")&`x'==.
replace `x' = 5 if regexm(`y',"[a][n][t][h]")&`x'==.
replace `x' = 14 if regexm(`y',"[c][i][v][i]")&`x'==.
replace `x' = 13 if regexm(`y',"[c][h][e][m]")&`x'==.
}
}
* University Department Match Rate: NRC listing to GRFP Departments: 58136 out of 58218 (99.86 percent)
*538 reported N/A for the department out of the total amount. Thus have department ID for 57598 (98.94 percent)
count if grfp_dept_id != .
count if grfp_dept_id != . & grfp_dept_id > 0

set more off
tab grfp_dept_id
save "$dir/GRFP/GRFP list with IPEDS & NRC dept.dta", replace

********************************************************************************
********************************************************************************
** Clean up variable 
** Create unique IPEDS/Dept ID
********************************************************************************
********************************************************************************
clear all
set more off
use "$dir/GRFP/GRFP list with IPEDS & NRC dept.dta"

*1* IPEDS ID prep (Proposed Institution <ipeds> & Current Institution <ipeds_cur>)
foreach x in ipeds ipeds_cur {
gen double `x'_to_id = `x' if `x' > 0 & `x' !=.
format `x'_to_id %12.0f
replace `x'_to_id = `x'_to_id * 1000
count if `x'_to_id !=. & `x'_to_id > 0
count if `x'_to_id !=. & `x'_to_id > 0 & grfp_year > 2004 & grfp_year < 2009
count if no_change == 1 & grfp_year > 2004 & grfp_year < 2009
** Proposed Institution: 10350 have proposed uni ID in timeframe (05-08); 57139 total observations (94-14) with proposed ipeds
** Current Institution: 29854 total observations with current ipeds; 9426 in our timeframe (05 - 08)
** No Change: 7171 where current = proposed from 05 - 08 
	* ~69.3 % match with proposed listing (= 7171/10350)
	* ~76.1 % match with current listing (= 7171/9426)
}

*2* GRFP department ID prep
describe grfp_dept_id
sum grfp_dept_id if grfp_dept_id > 0 & grfp_dept_id !=. 
** 620 do not have dept id; 57598 total observations with dept
gen double grfp_dept_to_id = grfp_dept_id if grfp_dept_id > 0 & grfp_dept_id !=.

*3* Create unique IPEDS_DEPT ID (for proposed and current institutions)
foreach x in ipeds ipeds_cur {
gen double grfp_`x'_dept_id = `x'_to_id + grfp_dept_to_id
format grfp_`x'_dept_id %12.0f
lab var grfp_`x'_dept_id "Unique University/Dept ID (`x'+0+NRC dept)"
count if grfp_`x'_dept_id !=. & grfp_`x'_dept_id > 0
count if grfp_`x'_dept_id !=. & grfp_`x'_dept_id > 0 & grfp_year > 2004 & grfp_year < 2009
** Proposed Institution: 100% match with department in the timeframe of interest (05-08); 97.1% total match rate (full sample) (=56527/58218)
** Current Institution: 100% match with department in the timeframe of interest (05-08)
}

** UNIQUE IPEDS_DEPT ID: grfp_ipeds_dept_id (proposed institution) & grfp_ipeds_cur_dept_id (current institution)

gen nrc_dept_string = "."
foreach x in nrc_dept_string {
foreach y in grfp_dept_id {
set more off 
replace `x' = "Aerospace Engineering" if `y' == 1
replace `x' = "Agricultural and Resource Economics" if `y' == 2
replace `x' = "Animal Sciences" if `y' == 4
replace `x' = "Anthropology" if `y' == 5
replace `x' = "Applied Mathematics" if `y' == 6
replace `x' = "Astrophysics and Astronomy" if `y' == 7
replace `x' = "Biochemistry, Biophysics, and Structural Biology" if `y' == 8
replace `x' = "Biology/Integrated Biology/Integrated Biomedical Sciences (Note: Use this field only if the degree field is not specialized.)" if `y' == 9
replace `x' = "Biomedical Engineering and Bioengineering" if `y' == 10
replace `x' = "Cell and Developmental Biology" if `y' == 11
replace `x' = "Chemical Engineering" if `y' == 12
replace `x' = "Chemistry" if `y' == 13
replace `x' = "Civil and Environmental Engineering" if `y' == 14
replace `x' = "Communication" if `y' == 16
replace `x' = "Computer Engineering" if `y' == 18
replace `x' = "Computer Sciences" if `y' == 19
replace `x' = "Earth Sciences" if `y' == 20
replace `x' = "Ecology and Evolutionary Biology" if `y' == 21
replace `x' = "Economics" if `y' == 22
replace `x' = "Electrical and Computer Engineering" if `y' == 23
replace `x' = "Engineering Science and Materials (not elsewhere classified)" if `y' == 24
replace `x' = "Entomology" if `y' == 26
replace `x' = "Food Science" if `y' == 27
replace `x' = "Forestry and Forest Sciences" if `y' == 28
replace `x' = "Genetics and Genomics" if `y' == 30
replace `x' = "Geography" if `y' == 31
replace `x' = "History" if `y' == 33
replace `x' = "Immunology and Infectious Disease" if `y' == 35
replace `x' = "Kinesiology" if `y' == 36
replace `x' = "Linguistics" if `y' == 38
replace `x' = "Materials Science and Engineering" if `y' == 39
replace `x' = "Mathematics" if `y' == 40
replace `x' = "Mechanical Engineering" if `y' == 41
replace `x' = "Microbiology" if `y' == 42
replace `x' = "Neuroscience and Neurobiology" if `y' == 44
replace `x' = "Nursing" if `y' == 45
replace `x' = "Nutrition" if `y' == 46
replace `x' = "Oceanography, Atmospheric Sciences and Meteorology" if `y' == 47
replace `x' = "Operations Research, Systems Engineering and Industrial Engineering" if `y' == 48
replace `x' = "Pharmacology, Toxicology and Environmental Health" if `y' == 49
replace `x' = "Physics" if `y' == 51
replace `x' = "Physiology" if `y' == 52
replace `x' = "Plant Sciences" if `y' == 53
replace `x' = "Political Science" if `y' == 54
replace `x' = "Psychology" if `y' == 55
replace `x' = "Public Affairs, Public Policy and Public Administration" if `y' == 56
replace `x' = "Public Health" if `y' == 57
replace `x' = "Sociology" if `y' == 59
replace `x' = "Statistics and Probability" if `y' == 61
replace `x' = "American Studies" if `y' == 3
replace `x' = "Classics" if `y' == 15
replace `x' = "Comparative Literature" if `y' == 17
replace `x' = "English Language and Literature" if `y' == 25
replace `x' = "French and Francophone Language and Literature" if `y' == 29
replace `x' = "German Language and Literature" if `y' == 32
replace `x' = "History of Art, Architecture and Archaeology" if `y' == 34
replace `x' = "Languages, Societies and Cultures" if `y' == 37
replace `x' = "Music (except performance)" if `y' == 43
replace `x' = "Philosophy" if `y' == 50
replace `x' = "Religion" if `y' == 58
replace `x' = "Spanish and Portuguese Language and Literature" if `y' == 60
replace `x' = "Theatre and Performance Studies" if `y' == 62
}
}

tab nrc_dept_string
*tab grfp_ipeds_dept_id
foreach x in ipeds ipeds_cur {
gen double `x'_dept_id_to_merge = grfp_`x'_dept_id
sort `x'_dept_id_to_merge
}

foreach x in ipeds ipeds_cur {
sum `x'_dept_id_to_merge if no_change == 1
}

save "$dir/GRFP/GRFP list with IPEDS & NRC dept clean.dta", replace

keep grfp_dept_id nrc_dept_string
sort grfp_dept_id
drop if grfp_dept_id < 0 | grfp_dept_id ==.
sort grfp_dept_id
quietly by grfp_dept_id:  gen dup = cond(_N==1,0,_n)
keep if dup == 1
save "$dir/GRFP/GRFP dept list for vetting data.dta", replace
/*******************************************************************************
Note: this database is the original GRFP file. It now has the IPEDS ID & Unique Department ID. 
*AND* IPEDS CUR ID & Unique Department ID.
In addition, this dataset includes a unique university/department id for the proposed and current institutions
(ipeds_dept_id_to_merge & ipeds_cur_id_to_merge).
Use this variable to merge with other databases.
*******************************************************************************/
}

***** STEP 7: Prep NSF GRFP data for university-program merge (collapse data)
{
/** Brief overview of main aims: 
Merge GRFP Award data to NRC. 
There is a uniqe identifier <ipeds_dept_id_to_merge & ipeds_cur_dept_id_to_merge> 
to collapse at the year/university/department level. 

The NRC file "$dir/NRC 05 Survey/NRC list with IPEDS & dept clean.dta" 
is at the university/department level: 5004 observations. This dataset has the same
here is a uniqe identifier <ipeds_dept_id_to_merge>.

The unique identifer between GRFP & NRC is <id_merge>
**/

********************************************************************************
** Prepare GRFP data to merge: collapse from award level to year/university/department level
********************************************************************************
clear all 
set more off
use "$dir/GRFP/GRFP list with IPEDS & NRC dept clean.dta"
gen double counter = 1
gen double year = grfp_year
drop grfp_year
format year %4.0f
rename year grfp_year
/** as a side, generate social science department
gen ss_dept = 1 if grfp_dept_to_id==2|grfp_dept_to_id==5|grfp_dept_to_id==16|grfp_dept_to_id==22|grfp_dept_to_id==33|grfp_dept_to_id==38|grfp_dept_to_id==54|grfp_dept_to_id==55|grfp_dept_to_id==56|grfp_dept_to_id==59|grfp_dept_to_id==31
recode ss_dept (.=0)
*/
********************************************************************************
			** Collapse for ipeds (proposed institution) **			*note: full time frame
********************************************************************************
*1* Collapse *Honorable Mention* GRFP activity: 15,134 unique listings (range 1-27) 
	* First read in code 18 - 27 *
sort grfp_year ipeds_dept_id_to_merge
keep if offered_award == 0
drop if ipeds_dept_id_to_merge == .
collapse (sum) counter, by (grfp_year ipeds_dept_id_to_merge)
rename counter grfp_yr_dept_mention
lab var grfp_yr_dept_mention "Annual count of GRFP Honorable Mention (only), per proposed university department"
sum grfp_yr_dept_mention
gen double id_merge = ipeds_dept_id_to_merge
format id_merge %12.0f
drop ipeds_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Honorable Mention list year_university_department.dta", replace

*2* Collapse *Offered Award* GRFP activity: 11,925 unique listings (range 1-25)
	* First read in code 18 - 27 *
sort grfp_year ipeds_dept_id_to_merge
keep if offered_award == 1
drop if ipeds_dept_id_to_merge == .
collapse (sum) counter, by (grfp_year ipeds_dept_id_to_merge)
rename counter grfp_yr_dept_award
lab var grfp_yr_dept_award "Annual count of GRFP Offered Award (only), per proposed university department"
sum grfp_yr_dept_award
gen double id_merge = ipeds_dept_id_to_merge
format id_merge %12.0f
drop ipeds_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Offered Award list year_university_department.dta", replace

********************************************************************************
			** Collapse for ipeds_cur (current institution) **		*note: 05-14 time frame	(only when current is available)
********************************************************************************
*3* Collapse *Honorable Mention* GRFP activity: 8,896 unique listings (range 1-17) 
	* First read in code 18 - 27 *
sort grfp_year ipeds_cur_dept_id_to_merge
keep if offered_award == 0
drop if ipeds_cur_dept_id_to_merge == .
collapse (sum) counter, by (grfp_year ipeds_cur_dept_id_to_merge)
rename counter grfp_yr_cur_dept_mention
lab var grfp_yr_cur_dept_mention "Annual count of GRFP Honorable Mention (only), per current university department"
sum grfp_yr_cur_dept_mention
gen double id_merge = ipeds_cur_dept_id_to_merge
format id_merge %12.0f
drop ipeds_cur_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Honorable Mention list year_cur_university_department.dta", replace

*4* Collapse *Offered Award* GRFP activity: 7,633 unique listings (range 1-15)
	* First read in code 18 - 27 *
sort grfp_year ipeds_cur_dept_id_to_merge
keep if offered_award == 1
drop if ipeds_cur_dept_id_to_merge == .
collapse (sum) counter, by (grfp_year ipeds_cur_dept_id_to_merge)
rename counter grfp_yr_cur_dept_award
lab var grfp_yr_cur_dept_award "Annual count of GRFP Offered Award (only), per current university department"
sum grfp_yr_cur_dept_award
gen double id_merge = ipeds_cur_dept_id_to_merge
format id_merge %12.0f
drop ipeds_cur_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Offered Award list year_cur_university_department.dta", replace

********************************************************************************
	** Collapse for no_change (current institution = proposed institution) **	*note: 05-14 time frame	(only when current is available)
********************************************************************************
*5* Collapse *Honorable Mention* GRFP activity: 7,255 unique listings (range 1-16) 
	* First read in code 18 - 27 *
sort grfp_year ipeds_cur_dept_id_to_merge
keep if no_change == 1
keep if offered_award == 0
collapse (sum) counter, by (grfp_year ipeds_cur_dept_id_to_merge)
rename counter grfp_yr_nochange_dept_mention
lab var grfp_yr_nochange_dept_mention "Annual count of GRFP Honorable Mention (only), per university department CUR=PRO"
sum grfp_yr_nochange_dept_mention
gen double id_merge = ipeds_cur_dept_id_to_merge
format id_merge %12.0f
drop ipeds_cur_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Honorable Mention list year_nochange_university_department.dta", replace

*6* Collapse *Offered Award* GRFP activity: 5,953 unique listings (range 1-13)
	* First read in code 18 - 27 *
sort grfp_year ipeds_cur_dept_id_to_merge
keep if no_change == 1
keep if offered_award == 1
collapse (sum) counter, by (grfp_year ipeds_cur_dept_id_to_merge)
rename counter grfp_yr_nochange_dept_award
lab var grfp_yr_nochange_dept_award "Annual count of GRFP Offered Award (only), per university department CUR=PRO"
sum grfp_yr_nochange_dept_award
gen double id_merge = ipeds_cur_dept_id_to_merge
format id_merge %12.0f
drop ipeds_cur_dept_id_to_merge
sort id_merge
save "$dir/GRFP/GRFP collpased Offered Award list year_nochange_university_department.dta", replace

********************************************************************************
** Merge GRFP collapsed (long form) data back together
********************************************************************************

* A * Begin with collapsed (long form) data for proposed institutions
clear all 
set more off
use "$dir/GRFP/GRFP collpased Honorable Mention list year_university_department.dta"
sort grfp_year id_merge
merge 1:1 grfp_year id_merge using "$dir/GRFP/GRFP collpased Offered Award list year_university_department.dta"
	** 6,829 matched; 8,305 from master (not matched); 5,096 from using (not matched)
sort grfp_year id_merge
quietly by grfp_year id_merge:  gen dup = cond(_N==1,0,_n)
sum dup
	** 20,230 total department-year observations
drop dup
drop _merge
recode grfp_yr_dept_mention (.=0)
recode grfp_yr_dept_award (.=0)
gen id_proposed_uni_yr = 1

sort grfp_year id_merge
save "$dir/GRFP/GRFP collpased proposed institution.dta", replace

br grfp_year id_merge grfp_yr_dept_award grfp_yr_dept_mention 
describe grfp_year id_merge grfp_yr_dept_award grfp_yr_dept_mention 
rename grfp_yr_dept_award grfp_award
rename grfp_yr_dept_mention grfp_mention
reshape wide grfp_award grfp_mention id_proposed_uni_yr, i(id_merge) j(grfp_year)
*reshape wide grfp_award grfp_mention, i(id_merge) j(grfp_year)
	* 3,964 departments with any GRFP acitivity for proposed institutions
gen id_proposed_uni = 1
sort id_merge
save "$dir/GRFP/GRFP proposed institution reshaped.dta", replace


* B * Merge collapsed (long form) data for current institutions
clear all 
set more off
use "$dir/GRFP/GRFP collpased Honorable Mention list year_cur_university_department.dta"
sort grfp_year id_merge
merge 1:1 grfp_year id_merge using "$dir/GRFP/GRFP collpased Offered Award list year_cur_university_department.dta"
	** 3,804 matched; 5,092 from master (not matched); 3,829 from using (not matched)
sort grfp_year id_merge
quietly by grfp_year id_merge:  gen dup = cond(_N==1,0,_n)
sum dup
	** 12,725 total department-year observations
drop dup
drop _merge
recode grfp_yr_cur_dept_mention (.=0) 
recode grfp_yr_cur_dept_award (.=0)
gen id_cur_uni_yr = 1
sort grfp_year id_merge
save "$dir/GRFP/GRFP collpased current institution.dta", replace 

br grfp_year id_merge grfp_yr_cur_dept_award grfp_yr_cur_dept_mention 
describe grfp_year id_merge grfp_yr_cur_dept_award grfp_yr_cur_dept_mention 
rename grfp_yr_cur_dept_award grfp_cur_award
rename grfp_yr_cur_dept_mention grfp_cur_mention
reshape wide grfp_cur_award grfp_cur_mention id_cur_uni_yr, i(id_merge) j(grfp_year)
*reshape wide grfp_cur_award grfp_cur_mention, i(id_merge) j(grfp_year)
	* 4,017 departments with any GRFP acitivity for current institutions
gen id_current_uni = 1
sort id_merge
save "$dir/GRFP/GRFP current institution reshaped.dta", replace


* C * Merge collapsed data (long form) for current institutions, where current = proposed institution
clear all 
set more off
use "$dir/GRFP/GRFP collpased Honorable Mention list year_nochange_university_department.dta"
sort grfp_year id_merge
merge 1:1 grfp_year id_merge using "$dir/GRFP/GRFP collpased Offered Award list year_nochange_university_department.dta"
	** 3,208 matched; 4,047 from master (not matched); 2,745 from using (not matched)
sort grfp_year id_merge
quietly by grfp_year id_merge:  gen dup = cond(_N==1,0,_n)
sum dup
	** 10,000 total department-year observations
drop dup
drop _merge
recode grfp_yr_nochange_dept_mention (.=0) 
recode grfp_yr_nochange_dept_award (.=0)
gen id_nochange_uni_yr = 1
sort grfp_year id_merge
save "$dir/GRFP/GRFP collpased nochange institution.dta", replace

br grfp_year id_merge grfp_yr_nochange_dept_award grfp_yr_nochange_dept_mention 
describe grfp_year id_merge grfp_yr_nochange_dept_award grfp_yr_nochange_dept_mention  
rename grfp_yr_nochange_dept_award grfp_nochange_award
rename grfp_yr_nochange_dept_mention  grfp_nochange_mention
reshape wide grfp_nochange_award grfp_nochange_mention id_nochange_uni_yr, i(id_merge) j(grfp_year)
*reshape wide grfp_nochange_award grfp_nochange_mention, i(id_merge) j(grfp_year)
	* 2,941 departments with any GRFP acitivity for current institutions whose current = proposed
gen id_nochange_uni = 1
sort id_merge
save "$dir/GRFP/GRFP nochange institution reshaped.dta", replace

}
***** STEP 8: Prep NRC data for university-program merge 
{
clear all 
set more off
use "$dir/NRC 05 Survey/NRC list with IPEDS & dept clean.dta", replace
sum nrc_dept_id ipeds_dept_id_to_merge
gen double id_merge = ipeds_dept_id_to_merge
format id_merge %12.0f
br nrc_dept_id id_merge

	*1* Clean up NRC list
* Begin with 4892 (97.76% pf total) = 4892/5004
*Drop the humanities fields: 874 departments
drop if nrc_dept_id == 3|nrc_dept_id == 15|nrc_dept_id == 17|nrc_dept_id == 25|nrc_dept_id == 29|nrc_dept_id == 32|nrc_dept_id == 34|nrc_dept_id == 37|nrc_dept_id == 43|nrc_dept_id == 50|nrc_dept_id == 58|nrc_dept_id == 60|nrc_dept_id == 62
drop if nrc_broad_dept == "Humanities"
*Drop health (non-S&E fields): 94 departments
drop if nrc_dept_id == 45|nrc_dept_id == 36
** Leaving a total of 3951 departments from the NRC survey eligible for the GRFP program
sort id_merge
drop _merge
** Assess dups: note that there are dups uni/dept in the database (diagnostic test, to be dealt with in code below)
sort id_merge
format nrc_uni nrc_dept_full %50s
drop dup
quietly by id_merge: gen dup = cond(_N==1,0,_n)
sum dup
sum dup if dup > 1 
** 612 observations are dups with uni & narrow field, yet the program is unique (15% of sample, =612/3951). This is addressed in the code below (line 174).
drop dup

	*2* Adjust variable names

format ProgramName ProgramWebsite nrc_dept %50s
gen double uni_public = 1 if Control == "Public"
recode uni_public (.=0)
lab var uni_public "Public/Private University (binary, 1 = public)"
rename RegionalCode RegionalCode
lab var RegionalCode "1 NE, 2 MW, 3 SA, 4 SC, 5 W"
rename ProgramSizeQuartile ProgramSize_Q
lab var ProgramSize_Q "Program Size Quartile Ranking"

foreach x in ProgramSize_Q {
replace `x' = "." if `x' == "N/D"
} 

rename RRankings5thPerce R_rank_5th_pct
lab var R_rank_5th_pct "R Ranking, 5th percent"
rename RRankings95thPercen R_rank_95th_pct
lab var R_rank_95th_pct "R Ranking, 95th percent"
rename SRankings5thPercent S_rank_5th_pct
lab var S_rank_5th_pct "S Ranking, 5th percent"
rename SRankings95thPercenti S_rank_95th_pct
lab var S_rank_95th_pct "S Ranking, 95th percent"
rename ResearchActivity5thPercenti res_act_5th_pct
lab var res_act_5th_pct "Research Activity, 5th percent"
rename ResearchActivity95thPercent res_act_95th_pct
lab var res_act_5th_pct "Research Activity, 95th percent"
rename StudentSupportOutcomes student_support_5th_pct
lab var student_support_5th_pct "Student Support & Outcomes, 5th percent"
rename Q student_support_95th_pct
lab var student_support_95th_pct "Student Support & Outcomes, 95th percent"
rename Diversity5thPerc diversity_5th_pct
lab var diversity_5th_pct "Diversity, 5th percent"
rename Diversity95thPerc diversity_95th_pct
lab var diversity_95th_pct "Diversity, 95th percent"

foreach x in R_rank_5th_pct R_rank_95th_pct S_rank_5th_pct S_rank_95th_pct res_act_5th_pct res_act_95th_pct student_support_5th_pct student_support_95th_pct diversity_95th_pct diversity_5th_pct {
replace `x' = "." if `x' == "Not Ranked"
} 

rename AverageNumberofPublications avg_pubs_per_fac 
rename AverageCitationsperPublicatio avg_citations_per_pub
rename PercentofFacultywithGrants pct_fac_w_grants_06
rename AwardsperAllocatedFacultyMe fac_awards_06
rename PercentofFirstYearStudentsw pct_1styr_stu_full_support_05
lab var pct_1styr_stu_full_support_05 "Percent of First Year Students with Full Financial Support, Fall 2005"
rename AvgCompletionPercentage8Y avg_degree_completion
lab var avg_degree_completion "Avg. Completion Percentage: 8 Years or Less for Humanities; 6 Years or Less for Other Fields"
rename MedianTimetoDegreeFulland median_time_to_degree_06
lab var median_time_to_degree_06 "Median Time to Degree (Full- and Part-Time Graduates), 2006"
rename PercentwithAcademicPlans pct_w_academic_plans
rename CollectsDataAboutPostGraduat dept_collects_data_postgrad
rename NonAsianMinorityFacultyasa nonasian_minority_fac_pct
lab var nonasian_minority_fac_pct "Non-Asian Minority Faculty as a Percent of Total Core and New Domestic Faculty, 2006"
rename FemaleFacultyasaPercentofT female_fac_pct
lab var female_fac_pct "Female Faculty as a Percent of Total Core and New Faculty, 2006"
rename NonAsianMinorityStudentsasa nonasian_minority_stu_pct
rename FemaleStudentsasaPercentof female_stu_pct
lab var female_stu_pct "Female Students as a Percent of Total Students, Fall 2005"
rename InternationalStudentsasaPer intl_stu_pct
lab var intl_stu_pct " International Students as a Percent of Total Students, Fall 2005"
rename AverageNumberofPhDsGradua avg_phd_grad
rename PercentofInterdisciplinaryFac pct_interdis_fac
rename AverageGREScores20042006 avg_GRE
lab var avg_GRE "Average GRE Scores, 2004-2006"
rename AK pct_1styr_w_ex_fellowship
lab var pct_1styr_w_ex_fellowship "Percent of First-Year Students with External Fellowships, 2005"
rename IsStudentWorkSpaceProvided stu_workspace
lab var stu_workspace "Is Student Work Space Provided? (1=Yes; 0=No)"
rename IsHealthInsuranceProvided  stu_hlt_ins_offer
rename NumberofStudentActivitiesMa num_stu_activity
lab var num_stu_activity "Number of Student Activities (Max=18)" 
rename TotalFaculty2006 tot_fac_06
rename NumberofAllocatedFaculty200  num_allocated_fac_06
rename AssistantProfessorsasaPercen pct_assist_fac
rename TenuredFacultyasaPercentof pct_tenured_fac
rename NumberofCoreandNewFaculty num_core_new_fac
rename NumberofStudentsEnrolledF num_stu_enrolled_05
lab var num_stu_enrolled_05 " Number of Students Enrolled, Fall 2005"
rename AverageAnnualFirstYearEnroll avg_annual_1styr_enroll
lab var avg_annual_1styr_enroll "Average Annual First Year Enrollment, 2002-2006"
rename PercentofStudentswithResearc pct_stu_res_fellow_05
rename PercentofStudentswithTeachin pct_stu_teach_fellow_05
rename AX pct_1styr_inst_fellow_alone
rename PercentofFirstYearStudents pct_1styr_combo
lab var pct_1styr_combo "Percent  of First Year Students with a Combination of Fellowships and Traineeships"
rename AZ pct_1styr_both
lab var pct_1styr_both "Percent of First Year Students with Both Internal Fellowships and Internal Assistantships"
rename BA pct_1styr_many_fellow
lab var pct_1styr_many_fellow "Percent of First Year Students with Multiple Internal Assistantships"
rename OrientationforNewGraduateStu stu_orientation
rename InternationalStudentOrientatio stu_intl_orientation
rename LanguageScreeningSupportPri stu_language_support
rename InstructioninWriting stu_writing_support
rename InstructioninStatistics stu_statistics_support
rename PrizesAwardsforTeachingor stu_prize_teach_res
rename AssistanceTraininginProposa stu_proposal_support
rename OncampusGraduateResearchConf stu_oncampus_grad_res_conf
rename FormalTraininginAcademicInte stu_academic_integrity_training
rename ActiveGraduateStudentAssociat stu_grad_assoc
rename StaffAssignedtotheGraduateS stu_staff_grad_assoc
rename FinancialSupportfortheGradua stu_finance_support_grad_assoc
rename PostedAcademicGrievanceProced stu_academic_greivance
rename DisputeResolutionProcedure stu_dispute_resolution
rename RegularGraduateProgramsDirect stu_grad_prog_meeting
rename AnnualReviewofAllEnrolledDo stu_annual_review_enrolled
rename OrganizedTrainingtoHelpStude stu_teacher_training
rename TravelSupporttoAttendProfess stu_travel_support
rename CountofStudentActivities stu_activities_count

foreach x in stu_travel_support stu_activities_count stu_teacher_training stu_annual_review_enrolled stu_grad_prog_meeting stu_dispute_resolution stu_academic_greivance stu_finance_support_grad_assoc stu_staff_grad_assoc stu_grad_assoc stu_academic_integrity_training stu_oncampus_grad_res_conf stu_proposal_support stu_orientation stu_intl_orientation stu_language_support stu_writing_support stu_statistics_support stu_prize_teach_res {
lab var `x' "Student Activities: 1 institution offer; 2 program offer; 3 both; 4 neither"
}

global var avg_citations_per_pub pct_fac_w_grants_06 pct_1styr_stu_full_support_05 avg_degree_completion median_time_to_degree_06 pct_w_academic_plans dept_collects_data_postgrad nonasian_minority_fac_pct female_fac_pct nonasian_minority_stu_pct female_stu_pct intl_stu_pct pct_interdis_fac avg_GRE pct_1styr_w_ex_fellowship stu_hlt_ins_offer num_stu_activity tot_fac_06 pct_assist_fac pct_tenured_fac num_core_new_fac num_stu_enrolled_05 avg_annual_1styr_enroll pct_stu_res_fellow_05 pct_stu_teach_fellow_05 pct_1styr_inst_fellow_alone pct_1styr_combo pct_1styr_both pct_1styr_many_fellow stu_orientation stu_intl_orientation stu_language_support stu_writing_support stu_statistics_support stu_prize_teach_res stu_proposal_support stu_oncampus_grad_res_conf stu_academic_integrity_training stu_grad_assoc stu_staff_grad_assoc stu_finance_support_grad_assoc stu_academic_greivance stu_dispute_resolution stu_grad_prog_meeting stu_annual_review_enrolled stu_teacher_training stu_travel_support stu_activities_count

foreach x in $var {
set more off
replace `x' = "." if `x' == "N/D"
}
foreach x in $var {
set more off
replace `x' = "." if `x' == "Not Ranked"
}
foreach x in $var {
set more off
replace `x' = "." if `x' == "N/R"
}
foreach x in pct_1styr_inst_fellow_alone pct_1styr_combo pct_1styr_both pct_1styr_many_fellow {
gen `x'_trim = trim(itrim(`x'))
replace `x' = "." if `x' == "*"
}
set more off 
destring, replace

recode dept_collects_data_postgrad (-1=0) 
recode stu_workspace (-1=0)
recode stu_hlt_ins_offer (-1=0)

drop nrc_uni_cleaned nrc_uni_clns nrc_group_clean nrc_group_clns nrc_group_original nrc_clns_counter uni_clns_to_match ipeds_uni_clean_full ipeds_uni_clean1 ipeds_uni_clean11 ipeds_uni_clean12 ipeds_uni_clean13 ipeds_uni_cleaned ipeds_uni_clns ipeds_handmatch nrc_dept_tally pct_1styr_inst_fellow_alone_trim pct_1styr_combo_trim pct_1styr_both_trim pct_1styr_many_fellow_trim

	*3* need to deal with DUPS of IPEDS_DEPT

quietly bys ipeds_dept_id_to_merge nrc_broad_dept nrc_dept_full nrc_uni Control RegionalCode: gen dup1 = cond(_N==1, 0, _n)
quietly bys ipeds_dept_id_to_merge: gen dup2 = cond(_N==1,0,_n)
sum dup1 if dup1>1
sum dup2 if dup2>1 
count if dup1!=dup2
	drop dup2
	*The first duplicate (dup1) is for institutions that are identical on Ipeds-Dept ID, NRC Broad, NRC Full, and NRS unversity name*
	*The second duplicate (dup2) is for institutions that are identical on Ipeds-Dept ID only*
	*Will deal with duplicates by looking at dup1 THEN dup2*
	
/*First Step: Get rid of duplicates across all 4 categories by taking the AVERAGE of some numeric variables
and the SUM of some other numeric variables*/

	*Average of Some Variables*
	global meanvars ProgramSize_Q R_rank_5th_pct R_rank_95th_pct S_rank_5th_pct S_rank_95th_pct res_act_5th_pct res_act_95th_pct student_support_5th_pct student_support_95th_pct diversity_5th_pct diversity_95th_pct avg_pubs_per_fac avg_citations_per_pub pct_fac_w_grants_06 pct_1styr_stu_full_support_05 avg_degree_completion median_time_to_degree_06 pct_w_academic_plans dept_collects_data_postgrad nonasian_minority_fac_pct female_fac_pct nonasian_minority_stu_pct female_stu_pct intl_stu_pct avg_phd_grad pct_interdis_fac avg_GRE pct_1styr_w_ex_fellowship stu_workspace stu_hlt_ins_offer num_stu_activity pct_assist_fac pct_tenured_fac avg_annual_1styr_enroll pct_stu_res_fellow_05 pct_stu_teach_fellow_05 pct_1styr_inst_fellow_alone pct_1styr_combo pct_1styr_both pct_1styr_many_fellow stu_orientation stu_intl_orientation stu_language_support stu_writing_support stu_statistics_support stu_prize_teach_res stu_proposal_support stu_oncampus_grad_res_conf stu_academic_integrity_training stu_grad_assoc stu_staff_grad_assoc stu_finance_support_grad_assoc stu_academic_greivance stu_dispute_resolution stu_grad_prog_meeting stu_annual_review_enrolled stu_teacher_training stu_travel_support stu_activities_count nrc_clns_tally groupid ipeds_year ipeds_id ipeds1 ipeds2 ipeds ipeds_to_id nrc_dept_id nrc_ipeds_dept_id uni_public
	foreach x in $meanvars {
		set more off
		by ipeds_dept_id_to_merge nrc_broad_dept nrc_dept_full nrc_uni Control RegionalCode: egen double a`x' = mean(`x')
		replace `x'=a`x' 
	}
		
	*Sum of Some Variables*	
	global sumvars fac_awards_06 tot_fac_06 num_allocated_fac_06 num_core_new_fac num_stu_enrolled_05 
	foreach x in $sumvars {
		by ipeds_dept_id_to_merge nrc_broad_dept nrc_dept_full nrc_uni Control RegionalCode: egen double s`x' = sum(`x')
		replace `x'=s`x'
	}

quietly bys ipeds_dept_id_to_merge nrc_broad_dept nrc_dept_full nrc_uni Control RegionalCode $meanvars $sumvars: gen dup3 = cond(_N==1, 0, _n)
	sum dup1 dup3
		*Stats are the same, so loop ran, and now can drop duplicates*
	drop dup3
	drop if dup1>1
	foreach x in $meanvars {
		drop a`x'  
	}
	foreach x in $sumvars {
		drop s`x'
	}

/*Second Step: Get rid of duplicates across just Ipeds-Dept ID*/
quietly bys ipeds_dept_id_to_merge: gen dup2 = cond(_N==1,0,_n)
sum dup2 if dup2>1
	*Two school-programs in NJ have duplicates, so run the same code as before and drop 2nd observation* 

	*Average of Some Variables*
	global meanvars ProgramSize_Q R_rank_5th_pct R_rank_95th_pct S_rank_5th_pct S_rank_95th_pct res_act_5th_pct res_act_95th_pct student_support_5th_pct student_support_95th_pct diversity_5th_pct diversity_95th_pct avg_pubs_per_fac avg_citations_per_pub pct_fac_w_grants_06 pct_1styr_stu_full_support_05 avg_degree_completion median_time_to_degree_06 pct_w_academic_plans dept_collects_data_postgrad nonasian_minority_fac_pct female_fac_pct nonasian_minority_stu_pct female_stu_pct intl_stu_pct avg_phd_grad pct_interdis_fac avg_GRE pct_1styr_w_ex_fellowship stu_workspace stu_hlt_ins_offer num_stu_activity pct_assist_fac pct_tenured_fac avg_annual_1styr_enroll pct_stu_res_fellow_05 pct_stu_teach_fellow_05 pct_1styr_inst_fellow_alone pct_1styr_combo pct_1styr_both pct_1styr_many_fellow stu_orientation stu_intl_orientation stu_language_support stu_writing_support stu_statistics_support stu_prize_teach_res stu_proposal_support stu_oncampus_grad_res_conf stu_academic_integrity_training stu_grad_assoc stu_staff_grad_assoc stu_finance_support_grad_assoc stu_academic_greivance stu_dispute_resolution stu_grad_prog_meeting stu_annual_review_enrolled stu_teacher_training stu_travel_support stu_activities_count nrc_clns_tally groupid ipeds_year ipeds_id ipeds1 ipeds2 ipeds ipeds_to_id nrc_dept_id nrc_ipeds_dept_id uni_public
	foreach x in $meanvars {
		set more off
		by ipeds_dept_id_to_merge: egen double a`x' = mean(`x') if dup2>0
		replace `x'=a`x' if dup2>0
	}
		
	*Sum of Some Variables*	
	global sumvars fac_awards_06 tot_fac_06 num_allocated_fac_06 num_core_new_fac num_stu_enrolled_05 
	foreach x in $sumvars {
		set more off
		by ipeds_dept_id_to_merge: egen double s`x' = sum(`x') if dup2>0
		replace `x'=s`x' if dup2>0
	}
	
quietly bys ipeds_dept_id_to_merge $meanvars $sumvars: gen dup4 = cond(_N==1, 0, _n)
	sum dup2 dup4
		*Stats are the same, so loop ran, and now can drop duplicates*
	drop dup4 
	drop if dup2>1
	foreach x in $meanvars {
		drop a`x'  
	}
	foreach x in $sumvars {
		drop s`x'
	}
	

duplicates report ipeds_dept_id_to_merge
duplicates report ipeds_dept_id_to_merge nrc_broad_dept nrc_dept_full nrc_uni Control RegionalCode
	*Duplicates are taken care of now* 3339 departments (3951-3339=612 duplicate observations)
	drop dup1 dup2

	
** Recode student activity variables and student health insurance/student workspace so that variables are discreetly 0 or 1 or 1,2,3,4**
tab  stu_workspace stu_hlt_ins_offer, m
replace stu_workspace = 0 if  stu_workspace <.5
replace stu_workspace = 1 if stu_workspace >=.5


foreach x in stu_orientation stu_intl_orientation stu_language_support stu_writing_support stu_statistics_support stu_prize_teach_res stu_proposal_support stu_oncampus_grad_res_conf stu_academic_integrity_training stu_grad_assoc stu_staff_grad_assoc stu_finance_support_grad_assoc stu_academic_greivance stu_dispute_resolution stu_grad_prog_meeting stu_annual_review_enrolled stu_teacher_training stu_travel_support {
		set more off
		replace  `x' = 1 if `x' < 1.5 
		replace  `x' = 2 if `x' >= 1.5 & `x' < 2.5
		replace  `x' = 3 if `x' >= 2.5 & `x' < 3.5 
		replace  `x' = 4 if `x' >= 3.5 & `x' != .
		tab `x', m 
	} 
	

** Recode ProgramSize_Q so that values are discreetly 1, 2, 3, or 4 **
set more off
tab ProgramSize_Q, m
replace ProgramSize_Q = 1 if ProgramSize_Q < 1.5
replace ProgramSize_Q = 2 if ProgramSize_Q >= 1.5 & ProgramSize_Q < 2.5
replace ProgramSize_Q = 3 if ProgramSize_Q >= 2.5 & ProgramSize_Q < 3.5
replace ProgramSize_Q = 4 if ProgramSize_Q >= 3.5 & ProgramSize_Q != .
tab ProgramSize_Q, m

	*check dups
sort id_merge
quietly by  id_merge:  gen dup = cond(_N==1,0,_n)
sum dup
drop dup

save "$dir/NRC 05 Survey/NRC IPEDS dept cross-section no dups ready to merge.dta", replace
* WITHOUT DUPS: 3339 departments in the study (612 were duplicates and the data on the department has been accounted for)

********************************************************************************
clear all 
set more off
use "$dir/NRC 05 Survey/NRC IPEDS dept cross-section no dups ready to merge.dta"
sort id_merge
merge 1:1 id_merge using "$dir/GRFP/GRFP proposed institution reshaped.dta"
*RESULTS* 3339 unique departments in NRC study; 3964 in using (=2114+1877)
*_merge == 3* 2087 matched 
*_merge == 2* 1877 from using not matched (47.3% of GRFP recipients not accounted for in the NRC survey) =1877/3964
*_merge == 1* 1252 from master not matched (37.49% of S&E-based NRC recipients not receive GRFP award) =1252/3339

****************************************
/*** Brief aside: assessment of the GRFP dpeartments that did not match (_merge == 2) 
keep if _merge == 2
set more off
br id_merge
drop _merge
*split up id_merge 9 digit double variable: first 6 digits IPEDS id and last 2 digits field
gen assess_id = id_merge
tostring assess_id, generate(str_assess_id)
gen ipeds_uni_backout = substr(str_assess_id,1,6)
gen ipeds_dept_backout = substr(str_assess_id,-2,2)
destring ipeds_uni_backout ipeds_dept_backout, replace
rename ipeds_id ipeds_id1
rename ipeds_uni_backout ipeds_id
merge m:m ipeds_id using "$dir/IPEDS/IPEDS list clean round2.dta"
keep if _merge == 3
drop _merge
rename ipeds_dept_backout grfp_dept_id
sort grfp_dept_id
merge m:1 grfp_dept_id using "$dir/GRFP/GRFP dept list for vetting data.dta"
tab nrc_dept_string
tab uni_clns_to_match
drop dup
sort uni_clns_to_match
quietly by uni_clns_to_match:  gen dup = cond(_N==1,0,_n)
count if dup == 1 | dup == 0
** Primary fields (6): Public Affairs, Public Policy and Public Administration; Operations Research, Systems Engineering and Industrial Engineering; Mathematics; Engineering Science and Materials (not elsewhere classified); Communication; Biochemistry, Biophysics, and Structural Biology
** Those where _merge == 2 come from a large range of universities (373). 
drop _merge
****************************************/

drop if _merge == 2
gen any_grfp_prop = 1 if _merge == 3
recode any_grfp_prop (.=0)
lab var any_grfp_prop "Binary: any GRFP success (proposed institution) (94-14)"
drop _merge
gen any_grfp_prop0508 = 0
foreach x in id_proposed_uni_yr2005 id_proposed_uni_yr2006 id_proposed_uni_yr2007 id_proposed_uni_yr2008 {
replace any_grfp_prop0508 = 1 if `x' == 1
}
lab var any_grfp_prop0508 "Binary: any GRFP success (proposed institution) 05-08"
gen any_award_grfp_prop0508 = 0 if any_grfp_prop0508 == 1
foreach x in grfp_award2005 grfp_award2006 grfp_award2007 grfp_award2008 {
replace any_award_grfp_prop0508 = 1 if `x' > 0 & `x' !=.
}
lab var any_award_grfp_prop0508 "Binary: any GRFP award (proposed institution) 05-08" 
egen award_count_prop0508 = rowtotal(grfp_award2005 grfp_award2006 grfp_award2007 grfp_award2008)
recode award_count_prop0508 (0=.)
lab var award_count_prop0508 "Award Count 0508 (proposed institution)"
sum any_grfp_* any_award_grfp award_count
save "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_proposed.dta", replace

********************************************************************************
clear all 
set more off
use "$dir/NRC 05 Survey/NRC IPEDS dept cross-section no dups ready to merge.dta"
sort id_merge
merge 1:1 id_merge using "$dir/GRFP/GRFP current institution reshaped.dta"
*RESULTS* 3339 unique departments; 4017 in using (=1787+2230)
*_merge == 3* 1787 matched 
*_merge == 2* 2230 from using not matched (55.51% of GRFP recipients not accounted for in the NRC survey) =2230/4017
*_merge == 1* 1552 from master not matched (46.48% of S&E-based NRC recipients not receive GRFP award) =1552/3339

****************************************
/*** Brief aside: assessment of the GRFP dpeartments that did not match (_merge == 2) 
keep if _merge == 2
set more off
br id_merge
drop _merge
*split up id_merge 9 digit double variable: first 6 digits IPEDS id and last 2 digits field
gen assess_id = id_merge
tostring assess_id, generate(str_assess_id)
gen ipeds_uni_backout = substr(str_assess_id,1,6)
gen ipeds_dept_backout = substr(str_assess_id,-2,2)
destring ipeds_uni_backout ipeds_dept_backout, replace
rename ipeds_id ipeds_id1
rename ipeds_uni_backout ipeds_id
merge m:m ipeds_id using "$dir/IPEDS/IPEDS list clean round2.dta"
keep if _merge == 3
drop _merge
rename ipeds_dept_backout grfp_dept_id
sort grfp_dept_id
merge m:1 grfp_dept_id using "$dir/GRFP/GRFP dept list for vetting data.dta"
tab nrc_dept_string
tab uni_clns_to_match
drop dup
sort uni_clns_to_match
quietly by uni_clns_to_match:  gen dup = cond(_N==1,0,_n)
count if dup == 1 | dup == 0
** Primary fields (6): Public Affairs, Public Policy and Public Administration; Operations Research, Systems Engineering and Industrial Engineering; Mathematics; Engineering Science and Materials (not elsewhere classified); Communication; Biochemistry, Biophysics, and Structural Biology
** Those where _merge == 2 come from a large range of universities (513). 
****************************************/

drop if _merge == 2
drop _merge
gen any_grfp_cur = 0
foreach x in id_cur_uni_yr2005 id_cur_uni_yr2006 id_cur_uni_yr2007 id_cur_uni_yr2008 id_cur_uni_yr2009 id_cur_uni_yr2010 id_cur_uni_yr2011 id_cur_uni_yr2012 id_cur_uni_yr2013 id_cur_uni_yr2014 {
replace any_grfp_cur = 1 if `x' == 1
} 
lab var any_grfp_cur "Binary: any GRFP success (current institution) 05-14"
keep id_merge grfp_cur_mention2005 grfp_cur_award2005 id_cur_uni_yr2005 grfp_cur_mention2006 grfp_cur_award2006 id_cur_uni_yr2006 grfp_cur_mention2007 grfp_cur_award2007 id_cur_uni_yr2007 grfp_cur_mention2008 grfp_cur_award2008 id_cur_uni_yr2008 grfp_cur_mention2009 grfp_cur_award2009 id_cur_uni_yr2009 grfp_cur_mention2010 grfp_cur_award2010 id_cur_uni_yr2010 grfp_cur_mention2011 grfp_cur_award2011 id_cur_uni_yr2011 grfp_cur_mention2012 grfp_cur_award2012 id_cur_uni_yr2012 grfp_cur_mention2013 grfp_cur_award2013 id_cur_uni_yr2013 grfp_cur_mention2014 grfp_cur_award2014 id_cur_uni_yr2014 id_current_uni any_grfp_cur
gen any_grfp_cur0508 = 0
foreach x in id_cur_uni_yr2005 id_cur_uni_yr2006 id_cur_uni_yr2007 id_cur_uni_yr2008 {
replace any_grfp_cur0508 = 1 if `x' == 1
}
lab var any_grfp_cur0508 "Binary: any GRFP success (current institution) 05-08"

gen any_award_grfp_cur0508 = 0 if any_grfp_cur0508 == 1
foreach x in grfp_cur_award2005 grfp_cur_award2006 grfp_cur_award2007 grfp_cur_award2008 {
replace any_award_grfp_cur0508 = 1 if `x' > 0 & `x' !=.
}
lab var any_award_grfp_cur0508 "Binary: any GRFP award (current institution) 05-08" 
egen award_count_cur0508 = rowtotal(grfp_cur_award2005 grfp_cur_award2006 grfp_cur_award2007 grfp_cur_award2008)
recode award_count_cur0508 (0=.)
lab var award_count_cur0508 "Award Count 0508 (current institution)"
sum any_grfp_cur* any_award_grfp award_count
save "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_current.dta", replace

********************************************************************************
clear all 
set more off
use "$dir/NRC 05 Survey/NRC IPEDS dept cross-section no dups ready to merge.dta"
sort id_merge
merge 1:1 id_merge using "$dir/GRFP/GRFP nochange institution reshaped.dta"
*RESULTS* 3339 unique departments; 2941 using (=1642+1299)
*_merge == 3* 1642 matched 
*_merge == 2* 1299 from using not matched (44.17% of GRFP recipients not accounted for in the NRC survey) =1299/2941
*_merge == 1* 1697 from master not matched (50.2% of S&E-based NRC recipients not receive GRFP award) =1697/3339

****************************************
/*** Brief aside: assessment of the GRFP dpeartments that did not match (_merge == 2) 
keep if _merge == 2
set more off
br id_merge
drop _merge
*split up id_merge 9 digit double variable: first 6 digits IPEDS id and last 2 digits field
gen assess_id = id_merge
tostring assess_id, generate(str_assess_id)
gen ipeds_uni_backout = substr(str_assess_id,1,6)
gen ipeds_dept_backout = substr(str_assess_id,-2,2)
destring ipeds_uni_backout ipeds_dept_backout, replace
rename ipeds_id ipeds_id1
rename ipeds_uni_backout ipeds_id
merge m:m ipeds_id using "$dir/IPEDS/IPEDS list clean round2.dta"
keep if _merge == 3
drop _merge
rename ipeds_dept_backout grfp_dept_id
sort grfp_dept_id
merge m:1 grfp_dept_id using "$dir/GRFP/GRFP dept list for vetting data.dta"
tab nrc_dept_string
tab uni_clns_to_match
drop dup
sort uni_clns_to_match
quietly by uni_clns_to_match:  gen dup = cond(_N==1,0,_n)
count if dup == 1 | dup == 0
** Primary fields (6): Public Affairs, Public Policy and Public Administration; Operations Research, Systems Engineering and Industrial Engineering; Mathematics; Engineering Science and Materials (not elsewhere classified); Communication; Biochemistry, Biophysics, and Structural Biology
** Those where _merge == 2 come from a large range of universities (310). 
****************************************/

drop if _merge == 2
drop _merge
gen any_grfp_nochange = 0
foreach x in id_nochange_uni_yr2005 id_nochange_uni_yr2006 id_nochange_uni_yr2007 id_nochange_uni_yr2008 id_nochange_uni_yr2009 id_nochange_uni_yr2010 id_nochange_uni_yr2011 id_nochange_uni_yr2012 id_nochange_uni_yr2013 id_nochange_uni_yr2014 {
replace any_grfp_nochange = 1 if `x' == 1
} 
lab var any_grfp_nochange "Binary: any GRFP success (cur=pro institution) 05-14"
keep id_merge grfp_nochange_mention2005 grfp_nochange_award2005 id_nochange_uni_yr2005 grfp_nochange_mention2006 grfp_nochange_award2006 id_nochange_uni_yr2006 grfp_nochange_mention2007 grfp_nochange_award2007 id_nochange_uni_yr2007 grfp_nochange_mention2008 grfp_nochange_award2008 id_nochange_uni_yr2008 grfp_nochange_mention2009 grfp_nochange_award2009 id_nochange_uni_yr2009 grfp_nochange_mention2010 grfp_nochange_award2010 id_nochange_uni_yr2010 grfp_nochange_mention2011 grfp_nochange_award2011 id_nochange_uni_yr2011 grfp_nochange_mention2012 grfp_nochange_award2012 id_nochange_uni_yr2012 grfp_nochange_mention2013 grfp_nochange_award2013 id_nochange_uni_yr2013 grfp_nochange_mention2014 grfp_nochange_award2014 id_nochange_uni_yr2014 id_nochange_uni any_grfp_nochange
gen any_grfp_nochange0508 = 0
foreach x in id_nochange_uni_yr2005 id_nochange_uni_yr2006 id_nochange_uni_yr2007 id_nochange_uni_yr2008 {
replace any_grfp_nochange0508 = 1 if `x' == 1
}
lab var any_grfp_nochange0508 "Binary: any GRFP success (cur=pro institution) 05-08"
gen any_award_grfp_nochange0508 = 0 if any_grfp_nochange0508 == 1
foreach x in grfp_nochange_award2005 grfp_nochange_award2006 grfp_nochange_award2007 grfp_nochange_award2008 {
replace any_award_grfp_nochange0508 = 1 if `x' > 0 & `x' !=.
}
lab var any_award_grfp_nochange0508 "Binary: any GRFP award (cur=pro institution) 05-08" 
egen award_count_nochange0508 = rowtotal(grfp_nochange_award2005 grfp_nochange_award2006 grfp_nochange_award2007 grfp_nochange_award2008)
recode award_count_nochange0508 (0=.)
lab var award_count_nochange0508 "Award Count 0508 (cur=pro institution)"
egen mention_count_nochange0508 = rowtotal(grfp_nochange_mention2005 grfp_nochange_mention2006 grfp_nochange_mention2007 grfp_nochange_mention2008)
recode mention_count_nochange0508 (0=.)
lab var mention_count_nochange0508 "Mention Count 0508 (cur=pro institution)"

sum any_grfp_* any_award_grfp award_count mention_count

save "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_nochange.dta", replace
}
***** STEP 9: MERGE NRC & NSF GRFP Data
{
clear all
set more off
use "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_proposed.dta"
sort id_merge
merge 1:1 id_merge using "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_current.dta"
drop _merge
merge 1:1 id_merge using "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section_nochange.dta"
drop _merge
save "$dir/NRC 05 Survey/NRC GRFP IPEDS dept cross-section.dta", replace
}


********************************************************************************
